You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2019/05/27 16:01:38 UTC

[arrow] branch master updated: ARROW-5027: [Python] Python bindings for JSON reader

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 207b350  ARROW-5027: [Python] Python bindings for JSON reader
207b350 is described below

commit 207b3507be82e92ebf29ec7d6d3b0bb86091c09a
Author: Philipp Moritz <pc...@gmail.com>
AuthorDate: Mon May 27 18:01:27 2019 +0200

    ARROW-5027: [Python] Python bindings for JSON reader
    
    This PR implements Python bindings for the JSON reader.
    
    Author: Philipp Moritz <pc...@gmail.com>
    
    Closes #4044 from pcmoritz/cython-read-json and squashes the following commits:
    
    1148f43ed <Philipp Moritz> update
    465e9d416 <Philipp Moritz> fixes and docstring
    d75b02d18 <Philipp Moritz> add tests
    b630845a7 <Philipp Moritz> temp commit
    aa0aa3f85 <Philipp Moritz> update
    b1742b00e <Philipp Moritz> update
    9dfc978cf <Philipp Moritz> add absolute imports
    c776e0f5e <Philipp Moritz> linting
    bb614282b <Philipp Moritz> comment in again
    46a8561fe <Philipp Moritz> update
    619064571 <Philipp Moritz> update
    364971c86 <Philipp Moritz> update
    90a3510a8 <Philipp Moritz> update
    4edb201f6 <Philipp Moritz> initial work on JSON reader python wrapper
---
 python/CMakeLists.txt                    |   2 +-
 python/pyarrow/__init__.pxd              |   2 +
 python/pyarrow/__init__.py               |   2 +
 python/pyarrow/_csv.pyx                  |   2 +
 python/pyarrow/_cuda.pxd                 |   2 +
 python/pyarrow/_cuda.pyx                 |   2 +
 python/pyarrow/_flight.pyx               |   2 +
 python/pyarrow/_json.pyx                 | 194 +++++++++++++++++++++++++++++++
 python/pyarrow/_orc.pxd                  |   2 +
 python/pyarrow/_orc.pyx                  |   2 +
 python/pyarrow/_parquet.pxd              |   2 +
 python/pyarrow/_parquet.pyx              |   2 +
 python/pyarrow/_plasma.pyx               |   2 +
 python/pyarrow/benchmark.py              |   2 +
 python/pyarrow/compat.py                 |   2 +
 python/pyarrow/csv.py                    |   2 +
 python/pyarrow/cuda.py                   |   2 +
 python/pyarrow/feather.py                |   2 +
 python/pyarrow/filesystem.py             |   2 +
 python/pyarrow/flight.py                 |   2 +
 python/pyarrow/gandiva.pyx               |   2 +
 python/pyarrow/hdfs.py                   |   2 +
 python/pyarrow/includes/libarrow.pxd     |  31 +++++
 python/pyarrow/ipc.py                    |   2 +
 python/pyarrow/{benchmark.py => json.py} |   4 +-
 python/pyarrow/jvm.py                    |   2 +
 python/pyarrow/lib.pxd                   |   2 +
 python/pyarrow/lib.pyx                   |   2 +
 python/pyarrow/orc.py                    |   2 +
 python/pyarrow/pandas_compat.py          |   2 +
 python/pyarrow/parquet.py                |   2 +
 python/pyarrow/plasma.py                 |   2 +
 python/pyarrow/serialization.py          |   2 +
 python/pyarrow/tests/test_json.py        | 147 +++++++++++++++++++++++
 python/pyarrow/types.py                  |   2 +
 python/pyarrow/util.py                   |   2 +
 python/setup.py                          |   1 +
 37 files changed, 438 insertions(+), 3 deletions(-)

diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 8f0a4d0..d7f1aba 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -378,7 +378,7 @@ if(UNIX)
   set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
 endif()
 
-set(CYTHON_EXTENSIONS lib _csv)
+set(CYTHON_EXTENSIONS lib _csv _json)
 
 set(LINK_LIBS arrow_shared arrow_python_shared)
 
diff --git a/python/pyarrow/__init__.pxd b/python/pyarrow/__init__.pxd
index 4f43455..95cea5c 100644
--- a/python/pyarrow/__init__.pxd
+++ b/python/pyarrow/__init__.pxd
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import absolute_import
+
 from libcpp.memory cimport shared_ptr
 from pyarrow.includes.libarrow cimport (CArray, CBuffer, CColumn, CDataType,
                                         CField, CRecordBatch, CSchema,
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 17916df..117b1d7 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -17,6 +17,8 @@
 
 # flake8: noqa
 
+from __future__ import absolute_import
+
 import os as _os
 import sys as _sys
 
diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx
index cfed987..0cc424d 100644
--- a/python/pyarrow/_csv.pyx
+++ b/python/pyarrow/_csv.pyx
@@ -20,6 +20,8 @@
 # cython: embedsignature = True
 # cython: language_level = 3
 
+from __future__ import absolute_import
+
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport *
 from pyarrow.lib cimport (check_status, Field, MemoryPool, ensure_type,
diff --git a/python/pyarrow/_cuda.pxd b/python/pyarrow/_cuda.pxd
index 1180601..fb66413 100644
--- a/python/pyarrow/_cuda.pxd
+++ b/python/pyarrow/_cuda.pxd
@@ -17,6 +17,8 @@
 
 # cython: language_level = 3
 
+from __future__ import absolute_import
+
 from pyarrow.lib cimport *
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport *
diff --git a/python/pyarrow/_cuda.pyx b/python/pyarrow/_cuda.pyx
index 87be0e6..a9f51b0 100644
--- a/python/pyarrow/_cuda.pyx
+++ b/python/pyarrow/_cuda.pyx
@@ -16,6 +16,8 @@
 # under the License.
 
 
+from __future__ import absolute_import
+
 from pyarrow.compat import tobytes
 from pyarrow.lib cimport *
 from pyarrow.includes.libarrow_cuda cimport *
diff --git a/python/pyarrow/_flight.pyx b/python/pyarrow/_flight.pyx
index 806796f..c682635 100644
--- a/python/pyarrow/_flight.pyx
+++ b/python/pyarrow/_flight.pyx
@@ -17,6 +17,8 @@
 
 # cython: language_level = 3
 
+from __future__ import absolute_import
+
 import collections
 import enum
 
diff --git a/python/pyarrow/_json.pyx b/python/pyarrow/_json.pyx
new file mode 100644
index 0000000..b5c839b
--- /dev/null
+++ b/python/pyarrow/_json.pyx
@@ -0,0 +1,194 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+# cython: language_level = 3
+
+from __future__ import absolute_import
+
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+from pyarrow.lib cimport (check_status, Field, MemoryPool, ensure_type,
+                          maybe_unbox_memory_pool, get_input_stream,
+                          pyarrow_wrap_table, pyarrow_wrap_data_type,
+                          pyarrow_unwrap_data_type, pyarrow_wrap_schema,
+                          pyarrow_unwrap_schema)
+
+
+cdef class ReadOptions:
+    """
+    Options for reading JSON files.
+
+    Parameters
+    ----------
+    use_threads : bool, optional (default True)
+        Whether to use multiple threads to accelerate reading
+    block_size : int, optional
+        How much bytes to process at a time from the input stream.
+        This will determine multi-threading granularity as well as
+        the size of individual chunks in the Table.
+    """
+    cdef:
+        CJSONReadOptions options
+
+    # Avoid mistakingly creating attributes
+    __slots__ = ()
+
+    def __init__(self, use_threads=None, block_size=None):
+        self.options = CJSONReadOptions.Defaults()
+        if use_threads is not None:
+            self.use_threads = use_threads
+        if block_size is not None:
+            self.block_size = block_size
+
+    @property
+    def use_threads(self):
+        """
+        Whether to use multiple threads to accelerate reading.
+        """
+        return self.options.use_threads
+
+    @use_threads.setter
+    def use_threads(self, value):
+        self.options.use_threads = value
+
+    @property
+    def block_size(self):
+        """
+        How much bytes to process at a time from the input stream.
+        This will determine multi-threading granularity as well as
+        the size of individual chunks in the Table.
+        """
+        return self.options.block_size
+
+    @block_size.setter
+    def block_size(self, value):
+        self.options.block_size = value
+
+cdef class ParseOptions:
+    """
+    Options for parsing JSON files.
+
+    Parameters
+    ----------
+    explicit_schema: Schema, optional (default None)
+        Optional explicit schema (no type inference, ignores other fields).
+    newlines_in_values: bool, optional (default False)
+        Whether objects may be printed across multiple lines (for example
+        pretty printed). If false, input must end with an empty line.
+    """
+
+    cdef:
+        CJSONParseOptions options
+
+    __slots__ = ()
+
+    def __init__(self, explicit_schema=None, newlines_in_values=None):
+        self.options = CJSONParseOptions.Defaults()
+        if explicit_schema is not None:
+            self.explicit_schema = explicit_schema
+        if newlines_in_values is not None:
+            self.newlines_in_values = newlines_in_values
+
+    @property
+    def explicit_schema(self):
+        """
+        Optional explicit schema (no type inference, ignores other fields)
+        """
+        if self.options.explicit_schema.get() == NULL:
+            return None
+        else:
+            return pyarrow_wrap_schema(self.options.explicit_schema)
+
+    @explicit_schema.setter
+    def explicit_schema(self, value):
+        self.options.explicit_schema = pyarrow_unwrap_schema(value)
+
+    @property
+    def newlines_in_values(self):
+        """
+        Whether newline characters are allowed in JSON values.
+        Setting this to True reduces the performance of multi-threaded
+        JSON reading.
+        """
+        return self.options.newlines_in_values
+
+    @newlines_in_values.setter
+    def newlines_in_values(self, value):
+        self.options.newlines_in_values = value
+
+
+cdef _get_reader(input_file, shared_ptr[InputStream]* out):
+    use_memory_map = False
+    get_input_stream(input_file, use_memory_map, out)
+
+cdef _get_read_options(ReadOptions read_options, CJSONReadOptions* out):
+    if read_options is None:
+        out[0] = CJSONReadOptions.Defaults()
+    else:
+        out[0] = read_options.options
+
+cdef _get_parse_options(ParseOptions parse_options, CJSONParseOptions* out):
+    if parse_options is None:
+        out[0] = CJSONParseOptions.Defaults()
+    else:
+        out[0] = parse_options.options
+
+
+def read_json(input_file, read_options=None, parse_options=None,
+              MemoryPool memory_pool=None):
+    """
+    Read a Table from a stream of JSON data.
+
+    Parameters
+    ----------
+    input_file: string, path or file-like object
+        The location of JSON data.
+    read_options: ReadOptions, optional
+        Options for the JSON reader (see ReadOptions constructor for defaults)
+    parse_options: ParseOptions, optional
+        Options for the JSON parser
+        (see ParseOptions constructor for defaults)
+    memory_pool: MemoryPool, optional
+        Pool to allocate Table memory from
+
+    Returns
+    -------
+    :class:`pyarrow.Table`
+        Contents of the JSON file as a in-memory table.
+    """
+    cdef:
+        shared_ptr[InputStream] stream
+        CJSONReadOptions c_read_options
+        CJSONParseOptions c_parse_options
+        shared_ptr[CJSONReader] reader
+        shared_ptr[CTable] table
+
+    _get_reader(input_file, &stream)
+    _get_read_options(read_options, &c_read_options)
+    _get_parse_options(parse_options, &c_parse_options)
+
+    check_status(CJSONReader.Make(maybe_unbox_memory_pool(memory_pool),
+                                  stream, c_read_options, c_parse_options,
+                                  &reader))
+
+    with nogil:
+        check_status(reader.get().Read(&table))
+
+    return pyarrow_wrap_table(table)
diff --git a/python/pyarrow/_orc.pxd b/python/pyarrow/_orc.pxd
index 7304937..ebbf8be 100644
--- a/python/pyarrow/_orc.pxd
+++ b/python/pyarrow/_orc.pxd
@@ -18,6 +18,8 @@
 # distutils: language = c++
 # cython: language_level = 3
 
+from __future__ import absolute_import
+
 from libc.string cimport const_char
 from libcpp.vector cimport vector as std_vector
 from pyarrow.includes.common cimport *
diff --git a/python/pyarrow/_orc.pyx b/python/pyarrow/_orc.pyx
index 9493f23..c9f5b2e 100644
--- a/python/pyarrow/_orc.pyx
+++ b/python/pyarrow/_orc.pyx
@@ -19,6 +19,8 @@
 # distutils: language = c++
 # cython: embedsignature = True
 
+from __future__ import absolute_import
+
 from cython.operator cimport dereference as deref
 from libcpp.vector cimport vector as std_vector
 from pyarrow.includes.common cimport *
diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd
index 75c0015..8a6bf73 100644
--- a/python/pyarrow/_parquet.pxd
+++ b/python/pyarrow/_parquet.pxd
@@ -18,6 +18,8 @@
 # distutils: language = c++
 # cython: language_level = 3
 
+from __future__ import absolute_import
+
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport (CChunkedArray, CSchema, CStatus,
                                         CTable, CMemoryPool, CBuffer,
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index db7f0c4..a4300cd 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -19,6 +19,8 @@
 # distutils: language = c++
 # cython: embedsignature = True
 
+from __future__ import absolute_import
+
 import io
 import six
 import warnings
diff --git a/python/pyarrow/_plasma.pyx b/python/pyarrow/_plasma.pyx
index 5be724d..e352377 100644
--- a/python/pyarrow/_plasma.pyx
+++ b/python/pyarrow/_plasma.pyx
@@ -20,6 +20,8 @@
 # cython: embedsignature = True
 # cython: language_level = 3
 
+from __future__ import absolute_import
+
 from libcpp cimport bool as c_bool, nullptr
 from libcpp.memory cimport shared_ptr, unique_ptr, make_shared
 from libcpp.string cimport string as c_string
diff --git a/python/pyarrow/benchmark.py b/python/pyarrow/benchmark.py
index ef1ef53..e8e38a4 100644
--- a/python/pyarrow/benchmark.py
+++ b/python/pyarrow/benchmark.py
@@ -17,4 +17,6 @@
 
 # flake8: noqa
 
+from __future__ import absolute_import
+
 from pyarrow.lib import benchmark_PandasObjectIsNull
diff --git a/python/pyarrow/compat.py b/python/pyarrow/compat.py
index 0549b16..e37307c 100644
--- a/python/pyarrow/compat.py
+++ b/python/pyarrow/compat.py
@@ -17,6 +17,8 @@
 
 # flake8: noqa
 
+from __future__ import absolute_import
+
 import itertools
 
 import numpy as np
diff --git a/python/pyarrow/csv.py b/python/pyarrow/csv.py
index 8375ad4..62d9290 100644
--- a/python/pyarrow/csv.py
+++ b/python/pyarrow/csv.py
@@ -15,4 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import absolute_import
+
 from pyarrow._csv import ReadOptions, ParseOptions, ConvertOptions, read_csv  # noqa
diff --git a/python/pyarrow/cuda.py b/python/pyarrow/cuda.py
index 29a217c..e4faa18 100644
--- a/python/pyarrow/cuda.py
+++ b/python/pyarrow/cuda.py
@@ -17,6 +17,8 @@
 
 # flake8: noqa
 
+from __future__ import absolute_import
+
 from pyarrow._cuda import (Context, IpcMemHandle, CudaBuffer,
                            HostBuffer, BufferReader, BufferWriter,
                            new_host_buffer,
diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py
index 93bcada..91b77cb 100644
--- a/python/pyarrow/feather.py
+++ b/python/pyarrow/feather.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import absolute_import
+
 import os
 
 import six
diff --git a/python/pyarrow/filesystem.py b/python/pyarrow/filesystem.py
index 98fb773..f941aa1 100644
--- a/python/pyarrow/filesystem.py
+++ b/python/pyarrow/filesystem.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import absolute_import
+
 import os
 import inspect
 import posixpath
diff --git a/python/pyarrow/flight.py b/python/pyarrow/flight.py
index 7d32778..37a21e4 100644
--- a/python/pyarrow/flight.py
+++ b/python/pyarrow/flight.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import absolute_import
+
 import sys
 
 if sys.version_info < (3,):
diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx
index 3904a8a..8f23aa1 100644
--- a/python/pyarrow/gandiva.pyx
+++ b/python/pyarrow/gandiva.pyx
@@ -20,6 +20,8 @@
 # cython: embedsignature = True
 # cython: language_level = 3
 
+from __future__ import absolute_import
+
 from libcpp cimport bool as c_bool, nullptr
 from libcpp.memory cimport shared_ptr, unique_ptr, make_shared
 from libcpp.string cimport string as c_string
diff --git a/python/pyarrow/hdfs.py b/python/pyarrow/hdfs.py
index 3ddd3cd..9d33ac7 100644
--- a/python/pyarrow/hdfs.py
+++ b/python/pyarrow/hdfs.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import absolute_import
+
 import os
 import posixpath
 import sys
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 6656e73..8443c0c 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1045,6 +1045,37 @@ cdef extern from "arrow/csv/api.h" namespace "arrow::csv" nogil:
         CStatus Read(shared_ptr[CTable]* out)
 
 
+cdef extern from "arrow/json/options.h" nogil:
+
+    cdef cppclass CJSONReadOptions" arrow::json::ReadOptions":
+        c_bool use_threads
+        int32_t block_size
+
+        @staticmethod
+        CJSONReadOptions Defaults()
+
+    cdef cppclass CJSONParseOptions" arrow::json::ParseOptions":
+        shared_ptr[CSchema] explicit_schema
+        c_bool newlines_in_values
+
+        @staticmethod
+        CJSONParseOptions Defaults()
+
+
+cdef extern from "arrow/json/reader.h" namespace "arrow::json" nogil:
+
+    cdef cppclass CJSONReader" arrow::json::TableReader":
+        @staticmethod
+        CStatus Make(CMemoryPool*, shared_ptr[InputStream],
+                     CJSONReadOptions, CJSONParseOptions,
+                     shared_ptr[CJSONReader]* out)
+
+        CStatus Read(shared_ptr[CTable]* out)
+
+    cdef CStatus ParseOne(CJSONParseOptions options, shared_ptr[CBuffer] json,
+                          shared_ptr[CRecordBatch]* out)
+
+
 cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
 
     cdef cppclass CFunctionContext" arrow::compute::FunctionContext":
diff --git a/python/pyarrow/ipc.py b/python/pyarrow/ipc.py
index 78bb347..c162400 100644
--- a/python/pyarrow/ipc.py
+++ b/python/pyarrow/ipc.py
@@ -17,6 +17,8 @@
 
 # Arrow file and stream reader/writer classes, and other messaging tools
 
+from __future__ import absolute_import
+
 import pyarrow as pa
 
 from pyarrow.lib import (Message, MessageReader,  # noqa
diff --git a/python/pyarrow/benchmark.py b/python/pyarrow/json.py
similarity index 87%
copy from python/pyarrow/benchmark.py
copy to python/pyarrow/json.py
index ef1ef53..cfa2528 100644
--- a/python/pyarrow/benchmark.py
+++ b/python/pyarrow/json.py
@@ -15,6 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# flake8: noqa
+from __future__ import absolute_import
 
-from pyarrow.lib import benchmark_PandasObjectIsNull
+from pyarrow._json import ReadOptions, ParseOptions, read_json  # noqa
diff --git a/python/pyarrow/jvm.py b/python/pyarrow/jvm.py
index 2341f40..9a59e10 100644
--- a/python/pyarrow/jvm.py
+++ b/python/pyarrow/jvm.py
@@ -25,6 +25,8 @@ through jpype. Modules that talk to a remote JVM like py4j will not work as the
 memory addresses reported by them are not reachable in the python process.
 """
 
+from __future__ import absolute_import
+
 
 import pyarrow as pa
 
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index cb5c732..998848d 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -17,6 +17,8 @@
 
 # cython: language_level = 3
 
+from __future__ import absolute_import
+
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport *
 from pyarrow.includes.libarrow cimport CStatus
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index 894ced5..783e2b2 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -19,6 +19,8 @@
 # distutils: language = c++
 # cython: embedsignature = True
 
+from __future__ import absolute_import
+
 import datetime
 import decimal as _pydecimal
 import json
diff --git a/python/pyarrow/orc.py b/python/pyarrow/orc.py
index 39111e5..6c39407 100644
--- a/python/pyarrow/orc.py
+++ b/python/pyarrow/orc.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import absolute_import
+
 from itertools import count
 from numbers import Integral
 
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index d90c8a2..8db97c0 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import absolute_import
+
 import ast
 import json
 import operator
diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index 78f7c0f..d44deee 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import absolute_import
+
 from collections import defaultdict
 from concurrent import futures
 from functools import partial
diff --git a/python/pyarrow/plasma.py b/python/pyarrow/plasma.py
index 13b3eec..748de97 100644
--- a/python/pyarrow/plasma.py
+++ b/python/pyarrow/plasma.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import absolute_import
+
 import contextlib
 import os
 import pyarrow as pa
diff --git a/python/pyarrow/serialization.py b/python/pyarrow/serialization.py
index fe170b2..3a605a9 100644
--- a/python/pyarrow/serialization.py
+++ b/python/pyarrow/serialization.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import absolute_import
+
 import collections
 import six
 import sys
diff --git a/python/pyarrow/tests/test_json.py b/python/pyarrow/tests/test_json.py
new file mode 100644
index 0000000..7885455
--- /dev/null
+++ b/python/pyarrow/tests/test_json.py
@@ -0,0 +1,147 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import io
+import unittest
+
+import pytest
+
+import pyarrow as pa
+from pyarrow.json import read_json, ReadOptions, ParseOptions
+
+
+def test_read_options():
+    cls = ReadOptions
+    opts = cls()
+
+    assert opts.block_size > 0
+    opts.block_size = 12345
+    assert opts.block_size == 12345
+
+    assert opts.use_threads is True
+    opts.use_threads = False
+    assert opts.use_threads is False
+
+    opts = cls(block_size=1234, use_threads=False)
+    assert opts.block_size == 1234
+    assert opts.use_threads is False
+
+
+def test_parse_options():
+    cls = ParseOptions
+    opts = cls()
+    assert opts.newlines_in_values is False
+    assert opts.explicit_schema is None
+
+    opts.newlines_in_values = True
+    assert opts.newlines_in_values is True
+
+    schema = pa.schema([pa.field('foo', pa.int32())])
+    opts.explicit_schema = schema
+    assert opts.explicit_schema == schema
+
+
+class BaseTestJSONRead:
+
+    def read_bytes(self, b, **kwargs):
+        return self.read_json(pa.py_buffer(b), **kwargs)
+
+    def check_names(self, table, names):
+        assert table.num_columns == len(names)
+        assert [c.name for c in table.columns] == names
+
+    def test_file_object(self):
+        data = b'{"a": 1, "b": 2}\n'
+        expected_data = {'a': [1], 'b': [2]}
+        bio = io.BytesIO(data)
+        table = self.read_json(bio)
+        assert table.to_pydict() == expected_data
+        # Text files not allowed
+        sio = io.StringIO(data.decode())
+        with pytest.raises(TypeError):
+            self.read_json(sio)
+
+    def test_simple_ints(self):
+        # Infer integer columns
+        rows = b'{"a": 1,"b": 2, "c": 3}\n{"a": 4,"b": 5, "c": 6}\n'
+        table = self.read_bytes(rows)
+        schema = pa.schema([('a', pa.int64()),
+                            ('b', pa.int64()),
+                            ('c', pa.int64())])
+        assert table.schema == schema
+        assert table.to_pydict() == {
+            'a': [1, 4],
+            'b': [2, 5],
+            'c': [3, 6],
+            }
+
+    def test_simple_varied(self):
+        # Infer various kinds of data
+        rows = (b'{"a": 1,"b": 2, "c": "3", "d": false}\n'
+                b'{"a": 4.0, "b": -5, "c": "foo", "d": true}\n')
+        table = self.read_bytes(rows)
+        schema = pa.schema([('a', pa.float64()),
+                            ('b', pa.int64()),
+                            ('c', pa.string()),
+                            ('d', pa.bool_())])
+        assert table.schema == schema
+        assert table.to_pydict() == {
+            'a': [1.0, 4.0],
+            'b': [2, -5],
+            'c': [u"3", u"foo"],
+            'd': [False, True],
+            }
+
+    def test_simple_nulls(self):
+        # Infer various kinds of data, with nulls
+        rows = (b'{"a": 1, "b": 2, "c": null, "d": null, "e": null}\n'
+                b'{"a": null, "b": -5, "c": "foo", "d": null, "e": true}\n'
+                b'{"a": 4.5, "b": null, "c": "nan", "d": null,"e": false}\n')
+        table = self.read_bytes(rows)
+        schema = pa.schema([('a', pa.float64()),
+                            ('b', pa.int64()),
+                            ('c', pa.string()),
+                            ('d', pa.null()),
+                            ('e', pa.bool_())])
+        assert table.schema == schema
+        assert table.to_pydict() == {
+            'a': [1.0, None, 4.5],
+            'b': [2, -5, None],
+            'c': [None, u"foo", u"nan"],
+            'd': [None, None, None],
+            'e': [None, True, False],
+            }
+
+
+class TestSerialJSONRead(BaseTestJSONRead, unittest.TestCase):
+
+    def read_json(self, *args, **kwargs):
+        read_options = kwargs.setdefault('read_options', ReadOptions())
+        read_options.use_threads = False
+        table = read_json(*args, **kwargs)
+        table._validate()
+        return table
+
+
+class TestParallelJSONRead(BaseTestJSONRead, unittest.TestCase):
+
+    def read_json(self, *args, **kwargs):
+        read_options = kwargs.setdefault('read_options', ReadOptions())
+        read_options.use_threads = True
+        table = read_json(*args, **kwargs)
+        table._validate()
+        return table
diff --git a/python/pyarrow/types.py b/python/pyarrow/types.py
index 2bd7027..def1dde 100644
--- a/python/pyarrow/types.py
+++ b/python/pyarrow/types.py
@@ -17,6 +17,8 @@
 
 # Tools for dealing with Arrow type metadata in Python
 
+from __future__ import absolute_import
+
 from pyarrow.lib import (is_boolean_value,  # noqa
                          is_integer_value,
                          is_float_value)
diff --git a/python/pyarrow/util.py b/python/pyarrow/util.py
index 6c17f5c..5e4fb35 100644
--- a/python/pyarrow/util.py
+++ b/python/pyarrow/util.py
@@ -17,6 +17,8 @@
 
 # Miscellaneous utility code
 
+from __future__ import absolute_import
+
 import functools
 import six
 import warnings
diff --git a/python/setup.py b/python/setup.py
index 8a6b4e1..88fcba2 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -169,6 +169,7 @@ class build_ext(_build_ext):
     CYTHON_MODULE_NAMES = [
         'lib',
         '_csv',
+        '_json',
         '_cuda',
         '_flight',
         '_parquet',