You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/03/08 00:02:27 UTC
[2/2] arrow git commit: ARROW-31: Python: prototype user object model,
add PyList conversion path with type inference
ARROW-31: Python: prototype user object model, add PyList conversion path with type inference
Depends on ARROW-7. Pretty mundane stuff but got to start somewhere. I'm going to do a little more in this patch (handle normal lists of strings and lists of other supported Python types) before merging.
Author: Wes McKinney <we...@apache.org>
Closes #19 from wesm/ARROW-31 and squashes the following commits:
2345541 [Wes McKinney] Test basic conversion of nested lists
1d4618b [Wes McKinney] Prototype string and double converters
b02b296 [Wes McKinney] Type inference for lists and lists-of-lists
8c3891c [Wes McKinney] Smoke test that array garbage collection deallocates memory
c28bf09 [Wes McKinney] Build array successfully, without validating contents
731544a [Wes McKinney] Move PrimitiveType::ToString template back to type.h
b5b5b82 [Wes McKinney] Failing test stubs, raise on null array
edb451c [Wes McKinney] Add a few data type smoke tests
47fd78e [Wes McKinney] Add unit test stub
07c1379 [Wes McKinney] Move some bits from arrow/type.h to type.cc
3a774fb [Wes McKinney] Add Status::ToString impls. Unit test stub
4e206fc [Wes McKinney] Add pandas converter placeholder
102ed36 [Wes McKinney] Cython array box scaffold builds
94f122f [Wes McKinney] Basic object model for sequence->arrow conversions
bdb02e7 [Wes McKinney] Use shared_ptr with dynamic make_builder too
d5655ba [Wes McKinney] Clean up array builder API to return shared_ptr<Array>
4132bda [Wes McKinney] Essential scaffolding -- error handling, memory pools, etc. -- to work toward converting Python lists to Arrow arrays
55e69a2 [Wes McKinney] Typed array stubs
ac8c796 [Wes McKinney] Cache primitive data type instances
8f7edaf [Wes McKinney] Consolidate Field and data type subclasses. Add more Python stubs
ea2f3ec [Wes McKinney] Bootstrap end-to-end exposure in Python, wrap DataType and Field types
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/9afb6677
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/9afb6677
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/9afb6677
Branch: refs/heads/master
Commit: 9afb667783b8cedbe6e9d6ee5eb02d35cf1d0f79
Parents: 571343b
Author: Wes McKinney <we...@apache.org>
Authored: Mon Mar 7 15:02:56 2016 -0800
Committer: Wes McKinney <we...@apache.org>
Committed: Mon Mar 7 15:02:56 2016 -0800
----------------------------------------------------------------------
cpp/CMakeLists.txt | 83 ++---
cpp/src/arrow/CMakeLists.txt | 1 -
cpp/src/arrow/api.h | 21 ++
cpp/src/arrow/builder.h | 10 +-
cpp/src/arrow/field.cc | 31 --
cpp/src/arrow/field.h | 63 ----
cpp/src/arrow/table/CMakeLists.txt | 15 -
cpp/src/arrow/table/column-test.cc | 1 -
cpp/src/arrow/table/column.cc | 2 +-
cpp/src/arrow/table/column.h | 2 +-
cpp/src/arrow/table/schema-test.cc | 9 +-
cpp/src/arrow/table/schema.cc | 2 +-
cpp/src/arrow/table/schema.h | 1 -
cpp/src/arrow/table/table-test.cc | 1 -
cpp/src/arrow/table/table.cc | 2 +-
cpp/src/arrow/table/test-common.h | 1 -
cpp/src/arrow/type.cc | 49 +++
cpp/src/arrow/type.h | 143 ++++++--
cpp/src/arrow/types/CMakeLists.txt | 22 +-
cpp/src/arrow/types/boolean.h | 3 +-
cpp/src/arrow/types/construct.cc | 21 +-
cpp/src/arrow/types/construct.h | 6 +-
cpp/src/arrow/types/json.cc | 5 +-
cpp/src/arrow/types/list-test.cc | 24 +-
cpp/src/arrow/types/list.cc | 12 -
cpp/src/arrow/types/list.h | 51 +--
cpp/src/arrow/types/null.h | 34 --
cpp/src/arrow/types/primitive-test.cc | 64 ++--
cpp/src/arrow/types/primitive.h | 22 +-
cpp/src/arrow/types/string-test.cc | 11 +-
cpp/src/arrow/types/string.h | 41 +--
cpp/src/arrow/types/struct-test.cc | 19 +-
cpp/src/arrow/types/struct.cc | 18 -
cpp/src/arrow/types/struct.h | 21 +-
cpp/src/arrow/util/CMakeLists.txt | 20 +-
cpp/src/arrow/util/buffer.cc | 8 +
cpp/src/arrow/util/buffer.h | 2 +
cpp/src/arrow/util/status.cc | 40 +++
python/CMakeLists.txt | 21 +-
python/arrow/__init__.py | 34 ++
python/arrow/array.pxd | 85 +++++
python/arrow/array.pyx | 179 ++++++++++
python/arrow/config.pyx | 2 +-
python/arrow/error.pxd | 20 ++
python/arrow/error.pyx | 30 ++
python/arrow/includes/arrow.pxd | 75 ++++-
python/arrow/includes/common.pxd | 4 +-
python/arrow/includes/pyarrow.pxd | 24 +-
python/arrow/scalar.pxd | 47 +++
python/arrow/scalar.pyx | 28 ++
python/arrow/schema.pxd | 39 +++
python/arrow/schema.pyx | 150 +++++++++
python/arrow/tests/test_array.py | 26 ++
python/arrow/tests/test_convert_builtin.py | 85 +++++
python/arrow/tests/test_schema.py | 51 +++
python/setup.py | 7 +-
python/src/pyarrow/adapters/builtin.cc | 415 ++++++++++++++++++++++++
python/src/pyarrow/adapters/builtin.h | 40 +++
python/src/pyarrow/adapters/pandas.h | 28 ++
python/src/pyarrow/api.h | 7 +
python/src/pyarrow/common.cc | 71 ++++
python/src/pyarrow/common.h | 95 ++++++
python/src/pyarrow/helpers.cc | 57 ++++
python/src/pyarrow/helpers.h | 34 ++
python/src/pyarrow/init.cc | 8 +-
python/src/pyarrow/init.h | 8 +-
python/src/pyarrow/status.cc | 92 ++++++
python/src/pyarrow/status.h | 144 ++++++++
68 files changed, 2290 insertions(+), 497 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 8042661..e8cb88c 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -37,18 +37,17 @@ if ("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1")
set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
endif()
-# Enable using a custom GCC toolchain to build Arrow
-if (NOT "$ENV{ARROW_GCC_ROOT}" STREQUAL "")
- set(GCC_ROOT $ENV{ARROW_GCC_ROOT})
- set(CMAKE_C_COMPILER ${GCC_ROOT}/bin/gcc)
- set(CMAKE_CXX_COMPILER ${GCC_ROOT}/bin/g++)
-endif()
-
if(APPLE)
# In newer versions of CMake, this is the default setting
set(CMAKE_MACOSX_RPATH 1)
endif()
+find_program(CCACHE_FOUND ccache)
+if(CCACHE_FOUND)
+ set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
+ set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
+endif(CCACHE_FOUND)
+
# ----------------------------------------------------------------------
# cmake options
@@ -126,38 +125,16 @@ endif ()
# Add common flags
set(CMAKE_CXX_FLAGS "${CXX_COMMON_FLAGS} ${CMAKE_CXX_FLAGS}")
-# Required to avoid static linking errors with dependencies
-add_definitions(-fPIC)
-
# Determine compiler version
include(CompilerInfo)
if ("${COMPILER_FAMILY}" STREQUAL "clang")
- # Clang helpfully provides a few extensions from C++11 such as the 'override'
- # keyword on methods. This doesn't change behavior, and we selectively enable
- # it in src/gutil/port.h only on clang. So, we can safely use it, and don't want
- # to trigger warnings when we do so.
- # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-c++11-extensions")
-
# Using Clang with ccache causes a bunch of spurious warnings that are
# purportedly fixed in the next version of ccache. See the following for details:
#
# http://petereisentraut.blogspot.com/2011/05/ccache-and-clang.html
# http://petereisentraut.blogspot.com/2011/09/ccache-and-clang-part-2.html
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qunused-arguments")
-
- # Only hardcode -fcolor-diagnostics if stderr is opened on a terminal. Otherwise
- # the color codes show up as noisy artifacts.
- #
- # This test is imperfect because 'cmake' and 'make' can be run independently
- # (with different terminal options), and we're testing during the former.
- execute_process(COMMAND test -t 2 RESULT_VARIABLE ARROW_IS_TTY)
- if ((${ARROW_IS_TTY} EQUAL 0) AND (NOT ("$ENV{TERM}" STREQUAL "dumb")))
- message("Running in a controlling terminal")
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcolor-diagnostics")
- else()
- message("Running without a controlling terminal or in a dumb terminal")
- endif()
endif()
# Sanity check linking option.
@@ -279,12 +256,6 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}")
include_directories(src)
############################################################
-# Visibility
-############################################################
-# For generate_export_header() and add_compiler_export_flags().
-include(GenerateExportHeader)
-
-############################################################
# Testing
############################################################
@@ -456,21 +427,32 @@ endif()
# Subdirectories
############################################################
-add_subdirectory(src/arrow)
-add_subdirectory(src/arrow/util)
-add_subdirectory(src/arrow/table)
-add_subdirectory(src/arrow/types)
-
-set(LINK_LIBS
- arrow_util
- arrow_table
- arrow_types)
+set(LIBARROW_LINK_LIBS
+)
set(ARROW_SRCS
src/arrow/array.cc
src/arrow/builder.cc
- src/arrow/field.cc
src/arrow/type.cc
+
+ src/arrow/table/column.cc
+ src/arrow/table/schema.cc
+ src/arrow/table/table.cc
+
+ src/arrow/types/construct.cc
+ src/arrow/types/floating.cc
+ src/arrow/types/integer.cc
+ src/arrow/types/json.cc
+ src/arrow/types/list.cc
+ src/arrow/types/primitive.cc
+ src/arrow/types/string.cc
+ src/arrow/types/struct.cc
+ src/arrow/types/union.cc
+
+ src/arrow/util/bit-util.cc
+ src/arrow/util/buffer.cc
+ src/arrow/util/memory-pool.cc
+ src/arrow/util/status.cc
)
set(LIBARROW_LINKAGE "SHARED")
@@ -479,8 +461,15 @@ add_library(arrow
${LIBARROW_LINKAGE}
${ARROW_SRCS}
)
-target_link_libraries(arrow ${LINK_LIBS})
-set_target_properties(arrow PROPERTIES LINKER_LANGUAGE CXX)
+set_target_properties(arrow
+ PROPERTIES
+ LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+target_link_libraries(arrow ${LIBARROW_LINK_LIBS})
+
+add_subdirectory(src/arrow)
+add_subdirectory(src/arrow/util)
+add_subdirectory(src/arrow/table)
+add_subdirectory(src/arrow/types)
install(TARGETS arrow
LIBRARY DESTINATION lib
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 102a8a1..77326ce 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -20,7 +20,6 @@ install(FILES
api.h
array.h
builder.h
- field.h
type.h
DESTINATION include/arrow)
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/api.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/api.h b/cpp/src/arrow/api.h
index 899e8aa..c73d4b3 100644
--- a/cpp/src/arrow/api.h
+++ b/cpp/src/arrow/api.h
@@ -15,7 +15,28 @@
// specific language governing permissions and limitations
// under the License.
+// Coarse public API while the library is in development
+
#ifndef ARROW_API_H
#define ARROW_API_H
+#include "arrow/array.h"
+#include "arrow/builder.h"
+#include "arrow/type.h"
+
+#include "arrow/table/column.h"
+#include "arrow/table/schema.h"
+#include "arrow/table/table.h"
+
+#include "arrow/types/boolean.h"
+#include "arrow/types/construct.h"
+#include "arrow/types/floating.h"
+#include "arrow/types/integer.h"
+#include "arrow/types/list.h"
+#include "arrow/types/string.h"
+#include "arrow/types/struct.h"
+
+#include "arrow/util/memory-pool.h"
+#include "arrow/util/status.h"
+
#endif // ARROW_API_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/builder.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index 491b913..8cc689c 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -32,7 +32,7 @@ class Array;
class MemoryPool;
class PoolBuffer;
-static constexpr int32_t MIN_BUILDER_CAPACITY = 1 << 8;
+static constexpr int32_t MIN_BUILDER_CAPACITY = 1 << 5;
// Base class for all data array builders
class ArrayBuilder {
@@ -78,12 +78,16 @@ class ArrayBuilder {
// Creates new array object to hold the contents of the builder and transfers
// ownership of the data
- virtual Status ToArray(Array** out) = 0;
+ virtual std::shared_ptr<Array> Finish() = 0;
+
+ const std::shared_ptr<DataType>& type() const {
+ return type_;
+ }
protected:
MemoryPool* pool_;
- TypePtr type_;
+ std::shared_ptr<DataType> type_;
// When nulls are first appended to the builder, the null bitmap is allocated
std::shared_ptr<PoolBuffer> nulls_;
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/field.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/field.cc b/cpp/src/arrow/field.cc
deleted file mode 100644
index 4568d90..0000000
--- a/cpp/src/arrow/field.cc
+++ /dev/null
@@ -1,31 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/field.h"
-
-#include <sstream>
-#include <string>
-
-namespace arrow {
-
-std::string Field::ToString() const {
- std::stringstream ss;
- ss << this->name << " " << this->type->ToString();
- return ss.str();
-}
-
-} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/field.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/field.h b/cpp/src/arrow/field.h
deleted file mode 100644
index 89a450c..0000000
--- a/cpp/src/arrow/field.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_FIELD_H
-#define ARROW_FIELD_H
-
-#include <string>
-
-#include "arrow/type.h"
-
-namespace arrow {
-
-// A field is a piece of metadata that includes (for now) a name and a data
-// type
-
-struct Field {
- // Field name
- std::string name;
-
- // The field's data type
- TypePtr type;
-
- Field(const std::string& name, const TypePtr& type) :
- name(name),
- type(type) {}
-
- bool operator==(const Field& other) const {
- return this->Equals(other);
- }
-
- bool operator!=(const Field& other) const {
- return !this->Equals(other);
- }
-
- bool Equals(const Field& other) const {
- return (this == &other) || (this->name == other.name &&
- this->type->Equals(other.type.get()));
- }
-
- bool nullable() const {
- return this->type->nullable;
- }
-
- std::string ToString() const;
-};
-
-} // namespace arrow
-
-#endif // ARROW_FIELD_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/table/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/CMakeLists.txt b/cpp/src/arrow/table/CMakeLists.txt
index 68bf314..26d843d 100644
--- a/cpp/src/arrow/table/CMakeLists.txt
+++ b/cpp/src/arrow/table/CMakeLists.txt
@@ -19,21 +19,6 @@
# arrow_table
#######################################
-set(TABLE_SRCS
- column.cc
- schema.cc
- table.cc
-)
-
-set(TABLE_LIBS
-)
-
-add_library(arrow_table STATIC
- ${TABLE_SRCS}
-)
-target_link_libraries(arrow_table ${TABLE_LIBS})
-SET_TARGET_PROPERTIES(arrow_table PROPERTIES LINKER_LANGUAGE CXX)
-
# Headers: top level
install(FILES
column.h
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/table/column-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/column-test.cc b/cpp/src/arrow/table/column-test.cc
index 4959b82..bf95932 100644
--- a/cpp/src/arrow/table/column-test.cc
+++ b/cpp/src/arrow/table/column-test.cc
@@ -21,7 +21,6 @@
#include <string>
#include <vector>
-#include "arrow/field.h"
#include "arrow/table/column.h"
#include "arrow/table/schema.h"
#include "arrow/table/test-common.h"
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/table/column.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/column.cc b/cpp/src/arrow/table/column.cc
index d68b491..573e650 100644
--- a/cpp/src/arrow/table/column.cc
+++ b/cpp/src/arrow/table/column.cc
@@ -20,7 +20,7 @@
#include <memory>
#include <sstream>
-#include "arrow/field.h"
+#include "arrow/type.h"
#include "arrow/util/status.h"
namespace arrow {
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/table/column.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/column.h b/cpp/src/arrow/table/column.h
index 64423bf..dfc7516 100644
--- a/cpp/src/arrow/table/column.h
+++ b/cpp/src/arrow/table/column.h
@@ -23,7 +23,7 @@
#include <vector>
#include "arrow/array.h"
-#include "arrow/field.h"
+#include "arrow/type.h"
namespace arrow {
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/table/schema-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/schema-test.cc b/cpp/src/arrow/table/schema-test.cc
index 0cf1b3c..d6725cc 100644
--- a/cpp/src/arrow/table/schema-test.cc
+++ b/cpp/src/arrow/table/schema-test.cc
@@ -20,7 +20,6 @@
#include <string>
#include <vector>
-#include "arrow/field.h"
#include "arrow/table/schema.h"
#include "arrow/type.h"
#include "arrow/types/string.h"
@@ -97,10 +96,10 @@ TEST_F(TestSchema, ToString) {
auto schema = std::make_shared<Schema>(fields);
std::string result = schema->ToString();
- std::string expected = R"(f0 ?int32
-f1 uint8
-f2 ?string
-f3 ?list<?int16>
+ std::string expected = R"(f0 int32
+f1 uint8 not null
+f2 string
+f3 list<int16>
)";
ASSERT_EQ(expected, result);
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/table/schema.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/schema.cc b/cpp/src/arrow/table/schema.cc
index fb3b4d6..d49d0a7 100644
--- a/cpp/src/arrow/table/schema.cc
+++ b/cpp/src/arrow/table/schema.cc
@@ -22,7 +22,7 @@
#include <sstream>
#include <vector>
-#include "arrow/field.h"
+#include "arrow/type.h"
namespace arrow {
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/table/schema.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/schema.h b/cpp/src/arrow/table/schema.h
index d04e3f6..103f01b 100644
--- a/cpp/src/arrow/table/schema.h
+++ b/cpp/src/arrow/table/schema.h
@@ -22,7 +22,6 @@
#include <string>
#include <vector>
-#include "arrow/field.h"
#include "arrow/type.h"
namespace arrow {
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/table/table-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/table-test.cc b/cpp/src/arrow/table/table-test.cc
index dd4f74c..c4fdb06 100644
--- a/cpp/src/arrow/table/table-test.cc
+++ b/cpp/src/arrow/table/table-test.cc
@@ -21,7 +21,6 @@
#include <string>
#include <vector>
-#include "arrow/field.h"
#include "arrow/table/column.h"
#include "arrow/table/schema.h"
#include "arrow/table/table.h"
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/table/table.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/table.cc b/cpp/src/arrow/table/table.cc
index 4cefc92..0c788b8 100644
--- a/cpp/src/arrow/table/table.cc
+++ b/cpp/src/arrow/table/table.cc
@@ -20,9 +20,9 @@
#include <memory>
#include <sstream>
-#include "arrow/field.h"
#include "arrow/table/column.h"
#include "arrow/table/schema.h"
+#include "arrow/type.h"
#include "arrow/util/status.h"
namespace arrow {
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/table/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/test-common.h b/cpp/src/arrow/table/test-common.h
index efe2f22..50a5f6a 100644
--- a/cpp/src/arrow/table/test-common.h
+++ b/cpp/src/arrow/table/test-common.h
@@ -21,7 +21,6 @@
#include <string>
#include <vector>
-#include "arrow/field.h"
#include "arrow/table/column.h"
#include "arrow/table/schema.h"
#include "arrow/table/table.h"
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/type.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index ff145e2..2657708 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -17,8 +17,56 @@
#include "arrow/type.h"
+#include <sstream>
+#include <string>
+
namespace arrow {
+std::string Field::ToString() const {
+ std::stringstream ss;
+ ss << this->name << " " << this->type->ToString();
+ return ss.str();
+}
+
+DataType::~DataType() {}
+
+StringType::StringType(bool nullable)
+ : DataType(LogicalType::STRING, nullable) {}
+
+StringType::StringType(const StringType& other)
+ : StringType(other.nullable) {}
+
+std::string StringType::ToString() const {
+ std::string result(name());
+ if (!nullable) {
+ result.append(" not null");
+ }
+ return result;
+}
+
+std::string ListType::ToString() const {
+ std::stringstream s;
+ s << "list<" << value_type->ToString() << ">";
+ if (!this->nullable) {
+ s << " not null";
+ }
+ return s.str();
+}
+
+std::string StructType::ToString() const {
+ std::stringstream s;
+ s << "struct<";
+ for (size_t i = 0; i < fields_.size(); ++i) {
+ if (i > 0) s << ", ";
+ const std::shared_ptr<Field>& field = fields_[i];
+ s << field->name << ": " << field->type->ToString();
+ }
+ s << ">";
+ if (!nullable) s << " not null";
+ return s.str();
+}
+
+const std::shared_ptr<NullType> NA = std::make_shared<NullType>();
const std::shared_ptr<BooleanType> BOOL = std::make_shared<BooleanType>();
const std::shared_ptr<UInt8Type> UINT8 = std::make_shared<UInt8Type>();
const std::shared_ptr<UInt16Type> UINT16 = std::make_shared<UInt16Type>();
@@ -30,5 +78,6 @@ const std::shared_ptr<Int32Type> INT32 = std::make_shared<Int32Type>();
const std::shared_ptr<Int64Type> INT64 = std::make_shared<Int64Type>();
const std::shared_ptr<FloatType> FLOAT = std::make_shared<FloatType>();
const std::shared_ptr<DoubleType> DOUBLE = std::make_shared<DoubleType>();
+const std::shared_ptr<StringType> STRING = std::make_shared<StringType>();
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/type.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 4193a0e..e78e494 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -20,6 +20,7 @@
#include <memory>
#include <string>
+#include <vector>
namespace arrow {
@@ -71,49 +72,46 @@ struct LogicalType {
UINT64 = 7,
INT64 = 8,
- // A boolean value represented as 1 byte
- BOOL = 9,
-
// A boolean value represented as 1 bit
- BIT = 10,
+ BOOL = 9,
// 4-byte floating point value
- FLOAT = 11,
+ FLOAT = 10,
// 8-byte floating point value
- DOUBLE = 12,
+ DOUBLE = 11,
// CHAR(N): fixed-length UTF8 string with length N
- CHAR = 13,
+ CHAR = 12,
// UTF8 variable-length string as List<Char>
- STRING = 14,
+ STRING = 13,
// VARCHAR(N): Null-terminated string type embedded in a CHAR(N + 1)
- VARCHAR = 15,
+ VARCHAR = 14,
// Variable-length bytes (no guarantee of UTF8-ness)
- BINARY = 16,
+ BINARY = 15,
// By default, int32 days since the UNIX epoch
- DATE = 17,
+ DATE = 16,
// Exact timestamp encoded with int64 since UNIX epoch
// Default unit millisecond
- TIMESTAMP = 18,
+ TIMESTAMP = 17,
// Timestamp as double seconds since the UNIX epoch
- TIMESTAMP_DOUBLE = 19,
+ TIMESTAMP_DOUBLE = 18,
// Exact time encoded with int64, default unit millisecond
- TIME = 20,
+ TIME = 19,
// Precision- and scale-based decimal type. Storage type depends on the
// parameters.
- DECIMAL = 21,
+ DECIMAL = 20,
// Decimal value encoded as a text string
- DECIMAL_TEXT = 22,
+ DECIMAL_TEXT = 21,
// A list of some logical data type
LIST = 30,
@@ -141,7 +139,9 @@ struct DataType {
type(type),
nullable(nullable) {}
- virtual bool Equals(const DataType* other) {
+ virtual ~DataType();
+
+ bool Equals(const DataType* other) {
// Call with a pointer so more friendly to subclasses
return this == other || (this->type == other->type &&
this->nullable == other->nullable);
@@ -154,10 +154,45 @@ struct DataType {
virtual std::string ToString() const = 0;
};
-
typedef std::shared_ptr<LayoutType> LayoutPtr;
typedef std::shared_ptr<DataType> TypePtr;
+// A field is a piece of metadata that includes (for now) a name and a data
+// type
+struct Field {
+ // Field name
+ std::string name;
+
+ // The field's data type
+ TypePtr type;
+
+ Field(const std::string& name, const TypePtr& type) :
+ name(name),
+ type(type) {}
+
+ bool operator==(const Field& other) const {
+ return this->Equals(other);
+ }
+
+ bool operator!=(const Field& other) const {
+ return !this->Equals(other);
+ }
+
+ bool Equals(const Field& other) const {
+ return (this == &other) || (this->name == other.name &&
+ this->type->Equals(other.type.get()));
+ }
+
+ bool Equals(const std::shared_ptr<Field>& other) const {
+ return Equals(*other.get());
+ }
+
+ bool nullable() const {
+ return this->type->nullable;
+ }
+
+ std::string ToString() const;
+};
struct BytesType : public LayoutType {
int size;
@@ -183,16 +218,18 @@ struct PrimitiveType : public DataType {
explicit PrimitiveType(bool nullable = true)
: DataType(Derived::type_enum, nullable) {}
- virtual std::string ToString() const {
- std::string result;
- if (nullable) {
- result.append("?");
- }
- result.append(static_cast<const Derived*>(this)->name());
- return result;
- }
+ std::string ToString() const override;
};
+template <typename Derived>
+inline std::string PrimitiveType<Derived>::ToString() const {
+ std::string result(static_cast<const Derived*>(this)->name());
+ if (!nullable) {
+ result.append(" not null");
+ }
+ return result;
+}
+
#define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME) \
typedef C_TYPE c_type; \
static constexpr LogicalType::type type_enum = LogicalType::ENUM; \
@@ -205,6 +242,10 @@ struct PrimitiveType : public DataType {
return NAME; \
}
+struct NullType : public PrimitiveType<NullType> {
+ PRIMITIVE_DECL(NullType, void, NA, 0, "null");
+};
+
struct BooleanType : public PrimitiveType<BooleanType> {
PRIMITIVE_DECL(BooleanType, uint8_t, BOOL, 1, "bool");
};
@@ -249,6 +290,55 @@ struct DoubleType : public PrimitiveType<DoubleType> {
PRIMITIVE_DECL(DoubleType, double, DOUBLE, 8, "double");
};
+struct ListType : public DataType {
+ // List can contain any other logical value type
+ TypePtr value_type;
+
+ explicit ListType(const TypePtr& value_type, bool nullable = true)
+ : DataType(LogicalType::LIST, nullable),
+ value_type(value_type) {}
+
+ static char const *name() {
+ return "list";
+ }
+
+ std::string ToString() const override;
+};
+
+// String is a logical type consisting of a physical list of 1-byte values
+struct StringType : public DataType {
+ explicit StringType(bool nullable = true);
+
+ StringType(const StringType& other);
+
+ static char const *name() {
+ return "string";
+ }
+
+ std::string ToString() const override;
+};
+
+struct StructType : public DataType {
+ std::vector<std::shared_ptr<Field> > fields_;
+
+ explicit StructType(const std::vector<std::shared_ptr<Field> >& fields,
+ bool nullable = true)
+ : DataType(LogicalType::STRUCT, nullable) {
+ fields_ = fields;
+ }
+
+ const std::shared_ptr<Field>& field(int i) const {
+ return fields_[i];
+ }
+
+ int num_children() const {
+ return fields_.size();
+ }
+
+ std::string ToString() const override;
+};
+
+extern const std::shared_ptr<NullType> NA;
extern const std::shared_ptr<BooleanType> BOOL;
extern const std::shared_ptr<UInt8Type> UINT8;
extern const std::shared_ptr<UInt16Type> UINT16;
@@ -260,6 +350,7 @@ extern const std::shared_ptr<Int32Type> INT32;
extern const std::shared_ptr<Int64Type> INT64;
extern const std::shared_ptr<FloatType> FLOAT;
extern const std::shared_ptr<DoubleType> DOUBLE;
+extern const std::shared_ptr<StringType> STRING;
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/CMakeLists.txt b/cpp/src/arrow/types/CMakeLists.txt
index e090aea..57cabde 100644
--- a/cpp/src/arrow/types/CMakeLists.txt
+++ b/cpp/src/arrow/types/CMakeLists.txt
@@ -19,31 +19,11 @@
# arrow_types
#######################################
-set(TYPES_SRCS
- construct.cc
- floating.cc
- integer.cc
- json.cc
- list.cc
- primitive.cc
- string.cc
- struct.cc
- union.cc
-)
-
-set(TYPES_LIBS
-)
-
-add_library(arrow_types STATIC
- ${TYPES_SRCS}
-)
-target_link_libraries(arrow_types ${TYPES_LIBS})
-SET_TARGET_PROPERTIES(arrow_types PROPERTIES LINKER_LANGUAGE CXX)
-
# Headers: top level
install(FILES
boolean.h
collection.h
+ construct.h
datetime.h
decimal.h
floating.h
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/boolean.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/boolean.h b/cpp/src/arrow/types/boolean.h
index 8fc9cfd..a5023d7 100644
--- a/cpp/src/arrow/types/boolean.h
+++ b/cpp/src/arrow/types/boolean.h
@@ -24,7 +24,8 @@ namespace arrow {
typedef PrimitiveArrayImpl<BooleanType> BooleanArray;
-// typedef PrimitiveBuilder<BooleanType, BooleanArray> BooleanBuilder;
+class BooleanBuilder : public ArrayBuilder {
+};
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/construct.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/construct.cc b/cpp/src/arrow/types/construct.cc
index 05d6b27..43f01a3 100644
--- a/cpp/src/arrow/types/construct.cc
+++ b/cpp/src/arrow/types/construct.cc
@@ -32,13 +32,13 @@ class ArrayBuilder;
// Initially looked at doing this with vtables, but shared pointers makes it
// difficult
-#define BUILDER_CASE(ENUM, BuilderType) \
- case LogicalType::ENUM: \
- *out = static_cast<ArrayBuilder*>(new BuilderType(pool, type)); \
+#define BUILDER_CASE(ENUM, BuilderType) \
+ case LogicalType::ENUM: \
+ out->reset(new BuilderType(pool, type)); \
return Status::OK();
-Status make_builder(MemoryPool* pool, const TypePtr& type,
- ArrayBuilder** out) {
+Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
+ std::shared_ptr<ArrayBuilder>* out) {
switch (type->type) {
BUILDER_CASE(UINT8, UInt8Builder);
BUILDER_CASE(INT8, Int8Builder);
@@ -58,13 +58,12 @@ Status make_builder(MemoryPool* pool, const TypePtr& type,
case LogicalType::LIST:
{
- ListType* list_type = static_cast<ListType*>(type.get());
- ArrayBuilder* value_builder;
- RETURN_NOT_OK(make_builder(pool, list_type->value_type, &value_builder));
+ std::shared_ptr<ArrayBuilder> value_builder;
- // The ListBuilder takes ownership of the value_builder
- ListBuilder* builder = new ListBuilder(pool, type, value_builder);
- *out = static_cast<ArrayBuilder*>(builder);
+ const std::shared_ptr<DataType>& value_type = static_cast<ListType*>(
+ type.get())->value_type;
+ RETURN_NOT_OK(MakeBuilder(pool, value_type, &value_builder));
+ out->reset(new ListBuilder(pool, type, value_builder));
return Status::OK();
}
// BUILDER_CASE(CHAR, CharBuilder);
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/construct.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/construct.h b/cpp/src/arrow/types/construct.h
index b5ba436..59ebe1a 100644
--- a/cpp/src/arrow/types/construct.h
+++ b/cpp/src/arrow/types/construct.h
@@ -18,6 +18,8 @@
#ifndef ARROW_TYPES_CONSTRUCT_H
#define ARROW_TYPES_CONSTRUCT_H
+#include <memory>
+
#include "arrow/type.h"
namespace arrow {
@@ -26,8 +28,8 @@ class ArrayBuilder;
class MemoryPool;
class Status;
-Status make_builder(MemoryPool* pool, const TypePtr& type,
- ArrayBuilder** out);
+Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
+ std::shared_ptr<ArrayBuilder>* out);
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/json.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/json.cc b/cpp/src/arrow/types/json.cc
index b29b957..168e370 100644
--- a/cpp/src/arrow/types/json.cc
+++ b/cpp/src/arrow/types/json.cc
@@ -19,10 +19,7 @@
#include <vector>
-#include "arrow/types/boolean.h"
-#include "arrow/types/integer.h"
-#include "arrow/types/floating.h"
-#include "arrow/types/null.h"
+#include "arrow/type.h"
#include "arrow/types/string.h"
#include "arrow/types/union.h"
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/list-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/types/list-test.cc
index b4bbd28..02991de 100644
--- a/cpp/src/arrow/types/list-test.cc
+++ b/cpp/src/arrow/types/list-test.cc
@@ -32,6 +32,7 @@
#include "arrow/types/test-common.h"
#include "arrow/util/status.h"
+using std::shared_ptr;
using std::string;
using std::unique_ptr;
using std::vector;
@@ -47,17 +48,18 @@ TEST(TypesTest, TestListType) {
ASSERT_EQ(list_type.type, LogicalType::LIST);
ASSERT_EQ(list_type.name(), string("list"));
- ASSERT_EQ(list_type.ToString(), string("?list<?uint8>"));
+ ASSERT_EQ(list_type.ToString(), string("list<uint8>"));
ASSERT_EQ(list_type.value_type->type, vt->type);
ASSERT_EQ(list_type.value_type->type, vt->type);
std::shared_ptr<DataType> st = std::make_shared<StringType>(false);
std::shared_ptr<DataType> lt = std::make_shared<ListType>(st, false);
- ASSERT_EQ(lt->ToString(), string("list<string>"));
+ ASSERT_EQ(lt->ToString(), string("list<string not null> not null"));
ListType lt2(lt, false);
- ASSERT_EQ(lt2.ToString(), string("list<list<string>>"));
+ ASSERT_EQ(lt2.ToString(),
+ string("list<list<string not null> not null> not null"));
}
// ----------------------------------------------------------------------
@@ -71,23 +73,21 @@ class TestListBuilder : public TestBuilder {
value_type_ = TypePtr(new Int32Type());
type_ = TypePtr(new ListType(value_type_));
- ArrayBuilder* tmp;
- ASSERT_OK(make_builder(pool_, type_, &tmp));
- builder_.reset(static_cast<ListBuilder*>(tmp));
+ std::shared_ptr<ArrayBuilder> tmp;
+ ASSERT_OK(MakeBuilder(pool_, type_, &tmp));
+ builder_ = std::dynamic_pointer_cast<ListBuilder>(tmp);
}
void Done() {
- Array* out;
- ASSERT_OK(builder_->ToArray(&out));
- result_.reset(static_cast<ListArray*>(out));
+ result_ = std::dynamic_pointer_cast<ListArray>(builder_->Finish());
}
protected:
TypePtr value_type_;
TypePtr type_;
- unique_ptr<ListBuilder> builder_;
- unique_ptr<ListArray> result_;
+ shared_ptr<ListBuilder> builder_;
+ shared_ptr<ListArray> result_;
};
@@ -116,7 +116,7 @@ TEST_F(TestListBuilder, TestBasics) {
vector<int> lengths = {3, 0, 4};
vector<uint8_t> is_null = {0, 1, 0};
- Int32Builder* vb = static_cast<Int32Builder*>(builder_->value_builder());
+ Int32Builder* vb = static_cast<Int32Builder*>(builder_->value_builder().get());
int pos = 0;
for (size_t i = 0; i < lengths.size(); ++i) {
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/list.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list.cc b/cpp/src/arrow/types/list.cc
index 577d71d..69a79a7 100644
--- a/cpp/src/arrow/types/list.cc
+++ b/cpp/src/arrow/types/list.cc
@@ -17,18 +17,6 @@
#include "arrow/types/list.h"
-#include <sstream>
-#include <string>
-
namespace arrow {
-std::string ListType::ToString() const {
- std::stringstream s;
- if (this->nullable) {
- s << "?";
- }
- s << "list<" << value_type->ToString() << ">";
- return s.str();
-}
-
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/list.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h
index f39fe5c..f40a824 100644
--- a/cpp/src/arrow/types/list.h
+++ b/cpp/src/arrow/types/list.h
@@ -36,21 +36,6 @@ namespace arrow {
class MemoryPool;
-struct ListType : public DataType {
- // List can contain any other logical value type
- TypePtr value_type;
-
- explicit ListType(const TypePtr& value_type, bool nullable = true)
- : DataType(LogicalType::LIST, nullable),
- value_type(value_type) {}
-
- static char const *name() {
- return "list";
- }
-
- virtual std::string ToString() const;
-};
-
class ListArray : public Array {
public:
ListArray() : Array(), offset_buf_(nullptr), offsets_(nullptr) {}
@@ -106,10 +91,9 @@ class ListArray : public Array {
class ListBuilder : public Int32Builder {
public:
ListBuilder(MemoryPool* pool, const TypePtr& type,
- ArrayBuilder* value_builder)
- : Int32Builder(pool, type) {
- value_builder_.reset(value_builder);
- }
+ std::shared_ptr<ArrayBuilder> value_builder)
+ : Int32Builder(pool, type),
+ value_builder_(value_builder) {}
Status Init(int32_t elements) {
// One more than requested.
@@ -147,30 +131,27 @@ class ListBuilder : public Int32Builder {
return Status::OK();
}
- // Initialize an array type instance with the results of this builder
- // Transfers ownership of all buffers
template <typename Container>
- Status Transfer(Container* out) {
- Array* child_values;
- RETURN_NOT_OK(value_builder_->ToArray(&child_values));
+ std::shared_ptr<Array> Transfer() {
+ auto result = std::make_shared<Container>();
+
+ std::shared_ptr<Array> items = value_builder_->Finish();
// Add final offset if the length is non-zero
if (length_) {
- raw_buffer()[length_] = child_values->length();
+ raw_buffer()[length_] = items->length();
}
- out->Init(type_, length_, values_, ArrayPtr(child_values),
+ result->Init(type_, length_, values_, items,
null_count_, nulls_);
values_ = nulls_ = nullptr;
capacity_ = length_ = null_count_ = 0;
- return Status::OK();
+
+ return result;
}
- virtual Status ToArray(Array** out) {
- ListArray* result = new ListArray();
- RETURN_NOT_OK(Transfer(result));
- *out = static_cast<Array*>(result);
- return Status::OK();
+ std::shared_ptr<Array> Finish() override {
+ return Transfer<ListArray>();
}
// Start a new variable-length list slot
@@ -198,10 +179,12 @@ class ListBuilder : public Int32Builder {
return Append(true);
}
- ArrayBuilder* value_builder() const { return value_builder_.get();}
+ const std::shared_ptr<ArrayBuilder>& value_builder() const {
+ return value_builder_;
+ }
protected:
- std::unique_ptr<ArrayBuilder> value_builder_;
+ std::shared_ptr<ArrayBuilder> value_builder_;
};
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/null.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/null.h b/cpp/src/arrow/types/null.h
deleted file mode 100644
index c67f752..0000000
--- a/cpp/src/arrow/types/null.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_TYPES_NULL_H
-#define ARROW_TYPES_NULL_H
-
-#include <string>
-#include <vector>
-
-#include "arrow/type.h"
-
-namespace arrow {
-
-struct NullType : public PrimitiveType<NullType> {
- PRIMITIVE_DECL(NullType, void, NA, 0, "null");
-};
-
-} // namespace arrow
-
-#endif // ARROW_TYPES_NULL_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/primitive-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc
index 02eaaa7..f35a258 100644
--- a/cpp/src/arrow/types/primitive-test.cc
+++ b/cpp/src/arrow/types/primitive-test.cc
@@ -37,6 +37,7 @@
#include "arrow/util/status.h"
using std::string;
+using std::shared_ptr;
using std::unique_ptr;
using std::vector;
@@ -98,12 +99,12 @@ class TestPrimitiveBuilder : public TestBuilder {
type_ = Attrs::type();
- ArrayBuilder* tmp;
- ASSERT_OK(make_builder(pool_, type_, &tmp));
- builder_.reset(static_cast<BuilderType*>(tmp));
+ std::shared_ptr<ArrayBuilder> tmp;
+ ASSERT_OK(MakeBuilder(pool_, type_, &tmp));
+ builder_ = std::dynamic_pointer_cast<BuilderType>(tmp);
- ASSERT_OK(make_builder(pool_, type_, &tmp));
- builder_nn_.reset(static_cast<BuilderType*>(tmp));
+ ASSERT_OK(MakeBuilder(pool_, type_, &tmp));
+ builder_nn_ = std::dynamic_pointer_cast<BuilderType>(tmp);
}
void RandomData(int N, double pct_null = 0.1) {
@@ -112,7 +113,6 @@ class TestPrimitiveBuilder : public TestBuilder {
}
void CheckNullable() {
- ArrayType result;
ArrayType expected;
int size = builder_->length();
@@ -125,7 +125,9 @@ class TestPrimitiveBuilder : public TestBuilder {
int32_t ex_null_count = null_count(nulls_);
expected.Init(size, ex_data, ex_null_count, ex_nulls);
- ASSERT_OK(builder_->Transfer(&result));
+
+ std::shared_ptr<ArrayType> result = std::dynamic_pointer_cast<ArrayType>(
+ builder_->Finish());
// Builder is now reset
ASSERT_EQ(0, builder_->length());
@@ -133,12 +135,11 @@ class TestPrimitiveBuilder : public TestBuilder {
ASSERT_EQ(0, builder_->null_count());
ASSERT_EQ(nullptr, builder_->buffer());
- ASSERT_TRUE(result.Equals(expected));
- ASSERT_EQ(ex_null_count, result.null_count());
+ ASSERT_TRUE(result->Equals(expected));
+ ASSERT_EQ(ex_null_count, result->null_count());
}
void CheckNonNullable() {
- ArrayType result;
ArrayType expected;
int size = builder_nn_->length();
@@ -146,22 +147,24 @@ class TestPrimitiveBuilder : public TestBuilder {
size * sizeof(T));
expected.Init(size, ex_data);
- ASSERT_OK(builder_nn_->Transfer(&result));
+
+ std::shared_ptr<ArrayType> result = std::dynamic_pointer_cast<ArrayType>(
+ builder_nn_->Finish());
// Builder is now reset
ASSERT_EQ(0, builder_nn_->length());
ASSERT_EQ(0, builder_nn_->capacity());
ASSERT_EQ(nullptr, builder_nn_->buffer());
- ASSERT_TRUE(result.Equals(expected));
- ASSERT_EQ(0, result.null_count());
+ ASSERT_TRUE(result->Equals(expected));
+ ASSERT_EQ(0, result->null_count());
}
protected:
TypePtr type_;
TypePtr type_nn_;
- unique_ptr<BuilderType> builder_;
- unique_ptr<BuilderType> builder_nn_;
+ shared_ptr<BuilderType> builder_;
+ shared_ptr<BuilderType> builder_nn_;
vector<T> draws_;
vector<uint8_t> nulls_;
@@ -225,15 +228,36 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendNull) {
ASSERT_OK(this->builder_->AppendNull());
}
- Array* result;
- ASSERT_OK(this->builder_->ToArray(&result));
- unique_ptr<Array> holder(result);
+ auto result = this->builder_->Finish();
for (int i = 0; i < size; ++i) {
ASSERT_TRUE(result->IsNull(i));
}
}
+TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) {
+ DECL_T();
+
+ int size = 10000;
+
+ vector<T>& draws = this->draws_;
+ vector<uint8_t>& nulls = this->nulls_;
+
+ int64_t memory_before = this->pool_->bytes_allocated();
+
+ this->RandomData(size);
+
+ int i;
+ for (i = 0; i < size; ++i) {
+ ASSERT_OK(this->builder_->Append(draws[i], nulls[i] > 0));
+ }
+
+ do {
+ std::shared_ptr<Array> result = this->builder_->Finish();
+ } while (false);
+
+ ASSERT_EQ(memory_before, this->pool_->bytes_allocated());
+}
TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) {
DECL_T();
@@ -331,11 +355,11 @@ TYPED_TEST(TestPrimitiveBuilder, TestResize) {
}
TYPED_TEST(TestPrimitiveBuilder, TestReserve) {
- int n = 100;
- ASSERT_OK(this->builder_->Reserve(n));
+ ASSERT_OK(this->builder_->Reserve(10));
ASSERT_EQ(0, this->builder_->length());
ASSERT_EQ(MIN_BUILDER_CAPACITY, this->builder_->capacity());
+ ASSERT_OK(this->builder_->Reserve(90));
ASSERT_OK(this->builder_->Advance(100));
ASSERT_OK(this->builder_->Reserve(MIN_BUILDER_CAPACITY));
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/primitive.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h
index 09d43e7..1073bb6 100644
--- a/cpp/src/arrow/types/primitive.h
+++ b/cpp/src/arrow/types/primitive.h
@@ -64,6 +64,8 @@ class PrimitiveArrayImpl : public PrimitiveArray {
PrimitiveArrayImpl() : PrimitiveArray() {}
+ virtual ~PrimitiveArrayImpl() {}
+
PrimitiveArrayImpl(int32_t length, const std::shared_ptr<Buffer>& data,
int32_t null_count = 0,
const std::shared_ptr<Buffer>& nulls = nullptr) {
@@ -197,24 +199,12 @@ class PrimitiveBuilder : public ArrayBuilder {
return Status::OK();
}
- // Initialize an array type instance with the results of this builder
- // Transfers ownership of all buffers
- Status Transfer(PrimitiveArray* out) {
- out->Init(type_, length_, values_, null_count_, nulls_);
+ std::shared_ptr<Array> Finish() override {
+ std::shared_ptr<ArrayType> result = std::make_shared<ArrayType>();
+ result->PrimitiveArray::Init(type_, length_, values_, null_count_, nulls_);
values_ = nulls_ = nullptr;
capacity_ = length_ = null_count_ = 0;
- return Status::OK();
- }
-
- Status Transfer(ArrayType* out) {
- return Transfer(static_cast<PrimitiveArray*>(out));
- }
-
- virtual Status ToArray(Array** out) {
- ArrayType* result = new ArrayType();
- RETURN_NOT_OK(Transfer(result));
- *out = static_cast<Array*>(result);
- return Status::OK();
+ return result;
}
value_type* raw_buffer() {
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/string-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string-test.cc b/cpp/src/arrow/types/string-test.cc
index 9af6672..8e82fd9 100644
--- a/cpp/src/arrow/types/string-test.cc
+++ b/cpp/src/arrow/types/string-test.cc
@@ -166,23 +166,18 @@ class TestStringBuilder : public TestBuilder {
void SetUp() {
TestBuilder::SetUp();
type_ = TypePtr(new StringType());
-
- ArrayBuilder* tmp;
- ASSERT_OK(make_builder(pool_, type_, &tmp));
- builder_.reset(static_cast<StringBuilder*>(tmp));
+ builder_.reset(new StringBuilder(pool_, type_));
}
void Done() {
- Array* out;
- ASSERT_OK(builder_->ToArray(&out));
- result_.reset(static_cast<StringArray*>(out));
+ result_ = std::dynamic_pointer_cast<StringArray>(builder_->Finish());
}
protected:
TypePtr type_;
std::unique_ptr<StringBuilder> builder_;
- std::unique_ptr<StringArray> result_;
+ std::shared_ptr<StringArray> result_;
};
TEST_F(TestStringBuilder, TestScalarAppend) {
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/string.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string.h b/cpp/src/arrow/types/string.h
index 5795cfe..8ccc0a9 100644
--- a/cpp/src/arrow/types/string.h
+++ b/cpp/src/arrow/types/string.h
@@ -71,28 +71,6 @@ struct VarcharType : public DataType {
static const LayoutPtr byte1(new BytesType(1));
static const LayoutPtr physical_string = LayoutPtr(new ListLayoutType(byte1));
-// String is a logical type consisting of a physical list of 1-byte values
-struct StringType : public DataType {
- explicit StringType(bool nullable = true)
- : DataType(LogicalType::STRING, nullable) {}
-
- StringType(const StringType& other)
- : StringType() {}
-
- static char const *name() {
- return "string";
- }
-
- virtual std::string ToString() const {
- std::string result;
- if (nullable) {
- result.append("?");
- }
- result.append(name());
- return result;
- }
-};
-
// TODO: add a BinaryArray layer in between
class StringArray : public ListArray {
public:
@@ -153,26 +131,23 @@ class StringArray : public ListArray {
class StringBuilder : public ListBuilder {
public:
explicit StringBuilder(MemoryPool* pool, const TypePtr& type) :
- ListBuilder(pool, type,
- static_cast<ArrayBuilder*>(new UInt8Builder(pool, value_type_))) {
+ ListBuilder(pool, type, std::make_shared<UInt8Builder>(pool, value_type_)) {
byte_builder_ = static_cast<UInt8Builder*>(value_builder_.get());
}
Status Append(const std::string& value) {
- RETURN_NOT_OK(ListBuilder::Append());
- return byte_builder_->Append(reinterpret_cast<const uint8_t*>(value.c_str()),
- value.size());
+ return Append(value.c_str(), value.size());
}
- Status Append(const uint8_t* value, int32_t length);
+ Status Append(const char* value, int32_t length) {
+ RETURN_NOT_OK(ListBuilder::Append());
+ return byte_builder_->Append(reinterpret_cast<const uint8_t*>(value), length);
+ }
Status Append(const std::vector<std::string>& values,
uint8_t* null_bytes);
- virtual Status ToArray(Array** out) {
- StringArray* result = new StringArray();
- RETURN_NOT_OK(ListBuilder::Transfer(result));
- *out = static_cast<Array*>(result);
- return Status::OK();
+ std::shared_ptr<Array> Finish() override {
+ return ListBuilder::Transfer<StringArray>();
}
protected:
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/struct-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct-test.cc b/cpp/src/arrow/types/struct-test.cc
index df61571..9a4777e 100644
--- a/cpp/src/arrow/types/struct-test.cc
+++ b/cpp/src/arrow/types/struct-test.cc
@@ -17,15 +17,16 @@
#include <gtest/gtest.h>
+#include <memory>
#include <string>
#include <vector>
-#include "arrow/field.h"
#include "arrow/type.h"
#include "arrow/types/integer.h"
#include "arrow/types/string.h"
#include "arrow/types/struct.h"
+using std::shared_ptr;
using std::string;
using std::vector;
@@ -33,23 +34,23 @@ namespace arrow {
TEST(TestStructType, Basics) {
TypePtr f0_type = TypePtr(new Int32Type());
- Field f0("f0", f0_type);
+ auto f0 = std::make_shared<Field>("f0", f0_type);
TypePtr f1_type = TypePtr(new StringType());
- Field f1("f1", f1_type);
+ auto f1 = std::make_shared<Field>("f1", f1_type);
TypePtr f2_type = TypePtr(new UInt8Type());
- Field f2("f2", f2_type);
+ auto f2 = std::make_shared<Field>("f2", f2_type);
- vector<Field> fields = {f0, f1, f2};
+ vector<shared_ptr<Field> > fields = {f0, f1, f2};
StructType struct_type(fields);
- ASSERT_TRUE(struct_type.field(0).Equals(f0));
- ASSERT_TRUE(struct_type.field(1).Equals(f1));
- ASSERT_TRUE(struct_type.field(2).Equals(f2));
+ ASSERT_TRUE(struct_type.field(0)->Equals(f0));
+ ASSERT_TRUE(struct_type.field(1)->Equals(f1));
+ ASSERT_TRUE(struct_type.field(2)->Equals(f2));
- ASSERT_EQ(struct_type.ToString(), "?struct<f0: ?int32, f1: ?string, f2: ?uint8>");
+ ASSERT_EQ(struct_type.ToString(), "struct<f0: int32, f1: string, f2: uint8>");
// TODO: out of bounds for field(...)
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/struct.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct.cc b/cpp/src/arrow/types/struct.cc
index 6b233bc..02af600 100644
--- a/cpp/src/arrow/types/struct.cc
+++ b/cpp/src/arrow/types/struct.cc
@@ -17,24 +17,6 @@
#include "arrow/types/struct.h"
-#include <cstdlib>
-#include <memory>
-#include <sstream>
-#include <string>
-
namespace arrow {
-std::string StructType::ToString() const {
- std::stringstream s;
- if (nullable) s << "?";
- s << "struct<";
- for (size_t i = 0; i < fields_.size(); ++i) {
- if (i > 0) s << ", ";
- const Field& field = fields_[i];
- s << field.name << ": " << field.type->ToString();
- }
- s << ">";
- return s.str();
-}
-
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/struct.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct.h b/cpp/src/arrow/types/struct.h
index e575c31..5842534 100644
--- a/cpp/src/arrow/types/struct.h
+++ b/cpp/src/arrow/types/struct.h
@@ -18,33 +18,14 @@
#ifndef ARROW_TYPES_STRUCT_H
#define ARROW_TYPES_STRUCT_H
+#include <memory>
#include <string>
#include <vector>
-#include "arrow/field.h"
#include "arrow/type.h"
namespace arrow {
-struct StructType : public DataType {
- std::vector<Field> fields_;
-
- explicit StructType(const std::vector<Field>& fields, bool nullable = true)
- : DataType(LogicalType::STRUCT, nullable) {
- fields_ = fields;
- }
-
- const Field& field(int i) const {
- return fields_[i];
- }
-
- int num_children() const {
- return fields_.size();
- }
-
- virtual std::string ToString() const;
-};
-
} // namespace arrow
#endif // ARROW_TYPES_STRUCT_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index c53f307..4272ce4 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -19,22 +19,6 @@
# arrow_util
#######################################
-set(UTIL_SRCS
- bit-util.cc
- buffer.cc
- memory-pool.cc
- status.cc
-)
-
-set(UTIL_LIBS
-)
-
-add_library(arrow_util STATIC
- ${UTIL_SRCS}
-)
-target_link_libraries(arrow_util ${UTIL_LIBS})
-SET_TARGET_PROPERTIES(arrow_util PROPERTIES LINKER_LANGUAGE CXX)
-
# Headers: top level
install(FILES
bit-util.h
@@ -50,7 +34,7 @@ install(FILES
add_library(arrow_test_util)
target_link_libraries(arrow_test_util
- arrow_util)
+)
SET_TARGET_PROPERTIES(arrow_test_util PROPERTIES LINKER_LANGUAGE CXX)
@@ -64,7 +48,6 @@ add_library(arrow_test_main
if (APPLE)
target_link_libraries(arrow_test_main
gtest
- arrow_util
arrow_test_util
dl)
set_target_properties(arrow_test_main
@@ -72,7 +55,6 @@ if (APPLE)
else()
target_link_libraries(arrow_test_main
gtest
- arrow_util
arrow_test_util
pthread
dl
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/util/buffer.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/buffer.cc b/cpp/src/arrow/util/buffer.cc
index 3f3807d..50f4716 100644
--- a/cpp/src/arrow/util/buffer.cc
+++ b/cpp/src/arrow/util/buffer.cc
@@ -31,6 +31,8 @@ Buffer::Buffer(const std::shared_ptr<Buffer>& parent, int64_t offset,
parent_ = parent;
}
+Buffer::~Buffer() {}
+
std::shared_ptr<Buffer> MutableBuffer::GetImmutableView() {
return std::make_shared<Buffer>(this->get_shared_ptr(), 0, size());
}
@@ -43,6 +45,12 @@ PoolBuffer::PoolBuffer(MemoryPool* pool) :
pool_ = pool;
}
+PoolBuffer::~PoolBuffer() {
+ if (mutable_data_ != nullptr) {
+ pool_->Free(mutable_data_, capacity_);
+ }
+}
+
Status PoolBuffer::Reserve(int64_t new_capacity) {
if (!mutable_data_ || new_capacity > capacity_) {
uint8_t* new_data;
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/util/buffer.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/buffer.h b/cpp/src/arrow/util/buffer.h
index 8704723..0c3e210 100644
--- a/cpp/src/arrow/util/buffer.h
+++ b/cpp/src/arrow/util/buffer.h
@@ -39,6 +39,7 @@ class Buffer : public std::enable_shared_from_this<Buffer> {
Buffer(const uint8_t* data, int64_t size) :
data_(data),
size_(size) {}
+ virtual ~Buffer();
// An offset into data that is owned by another buffer, but we want to be
// able to retain a valid pointer to it even after other shared_ptr's to the
@@ -136,6 +137,7 @@ class ResizableBuffer : public MutableBuffer {
class PoolBuffer : public ResizableBuffer {
public:
explicit PoolBuffer(MemoryPool* pool = nullptr);
+ virtual ~PoolBuffer();
virtual Status Resize(int64_t new_size);
virtual Status Reserve(int64_t new_capacity);
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/util/status.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/status.cc b/cpp/src/arrow/util/status.cc
index c64b8a3..c6e113e 100644
--- a/cpp/src/arrow/util/status.cc
+++ b/cpp/src/arrow/util/status.cc
@@ -35,4 +35,44 @@ const char* Status::CopyState(const char* state) {
return result;
}
+std::string Status::CodeAsString() const {
+ if (state_ == NULL) {
+ return "OK";
+ }
+
+ const char* type;
+ switch (code()) {
+ case StatusCode::OK:
+ type = "OK";
+ break;
+ case StatusCode::OutOfMemory:
+ type = "Out of memory";
+ break;
+ case StatusCode::KeyError:
+ type = "Key error";
+ break;
+ case StatusCode::Invalid:
+ type = "Invalid";
+ break;
+ case StatusCode::NotImplemented:
+ type = "NotImplemented";
+ break;
+ }
+ return std::string(type);
+}
+
+std::string Status::ToString() const {
+ std::string result(CodeAsString());
+ if (state_ == NULL) {
+ return result;
+ }
+
+ result.append(": ");
+
+ uint32_t length;
+ memcpy(&length, state_, sizeof(length));
+ result.append(reinterpret_cast<const char*>(state_ + 7), length);
+ return result;
+}
+
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index df55bfa..8fdd829 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -45,6 +45,12 @@ if ("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1")
set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
endif()
+find_program(CCACHE_FOUND ccache)
+if(CCACHE_FOUND)
+ set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
+ set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
+endif(CCACHE_FOUND)
+
############################################################
# Compiler flags
############################################################
@@ -389,7 +395,12 @@ add_subdirectory(src/pyarrow)
add_subdirectory(src/pyarrow/util)
set(PYARROW_SRCS
+ src/pyarrow/common.cc
+ src/pyarrow/helpers.cc
src/pyarrow/init.cc
+ src/pyarrow/status.cc
+
+ src/pyarrow/adapters/builtin.cc
)
set(LINK_LIBS
@@ -410,18 +421,16 @@ endif()
# Setup and build Cython modules
############################################################
-foreach(pyx_api_file
- arrow/config.pyx
- arrow/parquet.pyx)
- set_source_files_properties(${pyx_api_file} PROPERTIES CYTHON_API 1)
-endforeach(pyx_api_file)
-
set(USE_RELATIVE_RPATH ON)
set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
set(CYTHON_EXTENSIONS
+ array
config
+ error
parquet
+ scalar
+ schema
)
foreach(module ${CYTHON_EXTENSIONS})
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/__init__.py
----------------------------------------------------------------------
diff --git a/python/arrow/__init__.py b/python/arrow/__init__.py
index e69de29..3c049b8 100644
--- a/python/arrow/__init__.py
+++ b/python/arrow/__init__.py
@@ -0,0 +1,34 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# flake8: noqa
+
+from arrow.array import (Array, from_pylist, total_allocated_bytes,
+ BooleanArray, NumericArray,
+ Int8Array, UInt8Array,
+ ListArray, StringArray)
+
+from arrow.error import ArrowException
+
+from arrow.scalar import ArrayValue, NA, Scalar
+
+from arrow.schema import (null, bool_,
+ int8, int16, int32, int64,
+ uint8, uint16, uint32, uint64,
+ float_, double, string,
+ list_, struct, field,
+ DataType, Field, Schema)
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/array.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/array.pxd b/python/arrow/array.pxd
new file mode 100644
index 0000000..e32d277
--- /dev/null
+++ b/python/arrow/array.pxd
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from arrow.includes.common cimport shared_ptr
+from arrow.includes.arrow cimport CArray, LogicalType
+
+from arrow.scalar import NA
+
+from arrow.schema cimport DataType
+
+cdef extern from "Python.h":
+ int PySlice_Check(object)
+
+cdef class Array:
+ cdef:
+ shared_ptr[CArray] sp_array
+ CArray* ap
+
+ cdef readonly:
+ DataType type
+
+ cdef init(self, const shared_ptr[CArray]& sp_array)
+ cdef _getitem(self, int i)
+
+
+cdef class BooleanArray(Array):
+ pass
+
+
+cdef class NumericArray(Array):
+ pass
+
+
+cdef class Int8Array(NumericArray):
+ pass
+
+
+cdef class UInt8Array(NumericArray):
+ pass
+
+
+cdef class Int16Array(NumericArray):
+ pass
+
+
+cdef class UInt16Array(NumericArray):
+ pass
+
+
+cdef class Int32Array(NumericArray):
+ pass
+
+
+cdef class UInt32Array(NumericArray):
+ pass
+
+
+cdef class Int64Array(NumericArray):
+ pass
+
+
+cdef class UInt64Array(NumericArray):
+ pass
+
+
+cdef class ListArray(Array):
+ pass
+
+
+cdef class StringArray(Array):
+ pass
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/array.pyx
----------------------------------------------------------------------
diff --git a/python/arrow/array.pyx b/python/arrow/array.pyx
new file mode 100644
index 0000000..3a3210d
--- /dev/null
+++ b/python/arrow/array.pyx
@@ -0,0 +1,179 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+
+from arrow.includes.arrow cimport *
+cimport arrow.includes.pyarrow as pyarrow
+
+from arrow.compat import frombytes, tobytes
+from arrow.error cimport check_status
+
+from arrow.scalar import NA
+
+def total_allocated_bytes():
+ cdef MemoryPool* pool = pyarrow.GetMemoryPool()
+ return pool.bytes_allocated()
+
+
+cdef class Array:
+
+ cdef init(self, const shared_ptr[CArray]& sp_array):
+ self.sp_array = sp_array
+ self.ap = sp_array.get()
+ self.type = DataType()
+ self.type.init(self.sp_array.get().type())
+
+ property null_count:
+
+ def __get__(self):
+ return self.sp_array.get().null_count()
+
+ def __len__(self):
+ return self.sp_array.get().length()
+
+ def isnull(self):
+ raise NotImplemented
+
+ def __getitem__(self, key):
+ cdef:
+ Py_ssize_t n = len(self)
+
+ if PySlice_Check(key):
+ start = key.start or 0
+ while start < 0:
+ start += n
+
+ stop = key.stop if key.stop is not None else n
+ while stop < 0:
+ stop += n
+
+ step = key.step or 1
+ if step != 1:
+ raise NotImplementedError
+ else:
+ return self.slice(start, stop)
+
+ while key < 0:
+ key += len(self)
+
+ if self.ap.IsNull(key):
+ return NA
+ else:
+ return self._getitem(key)
+
+ cdef _getitem(self, int i):
+ raise NotImplementedError
+
+ def slice(self, start, end):
+ pass
+
+
+cdef class NullArray(Array):
+ pass
+
+
+cdef class BooleanArray(Array):
+ pass
+
+
+cdef class NumericArray(Array):
+ pass
+
+
+cdef class Int8Array(NumericArray):
+ pass
+
+
+cdef class UInt8Array(NumericArray):
+ pass
+
+
+cdef class Int16Array(NumericArray):
+ pass
+
+
+cdef class UInt16Array(NumericArray):
+ pass
+
+
+cdef class Int32Array(NumericArray):
+ pass
+
+
+cdef class UInt32Array(NumericArray):
+ pass
+
+
+cdef class Int64Array(NumericArray):
+ pass
+
+
+cdef class UInt64Array(NumericArray):
+ pass
+
+
+cdef class FloatArray(NumericArray):
+ pass
+
+
+cdef class DoubleArray(NumericArray):
+ pass
+
+
+cdef class ListArray(Array):
+ pass
+
+
+cdef class StringArray(Array):
+ pass
+
+
+cdef dict _array_classes = {
+ LogicalType_NA: NullArray,
+ LogicalType_BOOL: BooleanArray,
+ LogicalType_INT64: Int64Array,
+ LogicalType_DOUBLE: DoubleArray,
+ LogicalType_LIST: ListArray,
+ LogicalType_STRING: StringArray,
+}
+
+cdef object box_arrow_array(const shared_ptr[CArray]& sp_array):
+ if sp_array.get() == NULL:
+ raise ValueError('Array was NULL')
+
+ cdef CDataType* data_type = sp_array.get().type().get()
+
+ if data_type == NULL:
+ raise ValueError('Array data type was NULL')
+
+ cdef Array arr = _array_classes[data_type.type]()
+ arr.init(sp_array)
+ return arr
+
+
+def from_pylist(object list_obj, type=None):
+ """
+ Convert Python list to Arrow array
+ """
+ cdef:
+ shared_ptr[CArray] sp_array
+
+ check_status(pyarrow.ConvertPySequence(list_obj, &sp_array))
+ return box_arrow_array(sp_array)
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/config.pyx
----------------------------------------------------------------------
diff --git a/python/arrow/config.pyx b/python/arrow/config.pyx
index 8f10beb..521bc06 100644
--- a/python/arrow/config.pyx
+++ b/python/arrow/config.pyx
@@ -2,7 +2,7 @@
# distutils: language = c++
# cython: embedsignature = True
-cdef extern from 'pyarrow/init.h' namespace 'arrow::py':
+cdef extern from 'pyarrow/init.h' namespace 'pyarrow':
void pyarrow_init()
pyarrow_init()
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/error.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/error.pxd b/python/arrow/error.pxd
new file mode 100644
index 0000000..c18cb3e
--- /dev/null
+++ b/python/arrow/error.pxd
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from arrow.includes.pyarrow cimport *
+
+cdef check_status(const Status& status)
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/error.pyx
----------------------------------------------------------------------
diff --git a/python/arrow/error.pyx b/python/arrow/error.pyx
new file mode 100644
index 0000000..f1d5163
--- /dev/null
+++ b/python/arrow/error.pyx
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from arrow.includes.common cimport c_string
+
+from arrow.compat import frombytes
+
+class ArrowException(Exception):
+ pass
+
+cdef check_status(const Status& status):
+ if status.ok():
+ return
+
+ cdef c_string c_message = status.ToString()
+ raise ArrowException(frombytes(c_message))
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/includes/arrow.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/includes/arrow.pxd b/python/arrow/includes/arrow.pxd
index 3635ceb..fde5de9 100644
--- a/python/arrow/includes/arrow.pxd
+++ b/python/arrow/includes/arrow.pxd
@@ -20,4 +20,77 @@
from arrow.includes.common cimport *
cdef extern from "arrow/api.h" namespace "arrow" nogil:
- pass
+
+ enum LogicalType" arrow::LogicalType::type":
+ LogicalType_NA" arrow::LogicalType::NA"
+
+ LogicalType_BOOL" arrow::LogicalType::BOOL"
+
+ LogicalType_UINT8" arrow::LogicalType::UINT8"
+ LogicalType_INT8" arrow::LogicalType::INT8"
+ LogicalType_UINT16" arrow::LogicalType::UINT16"
+ LogicalType_INT16" arrow::LogicalType::INT16"
+ LogicalType_UINT32" arrow::LogicalType::UINT32"
+ LogicalType_INT32" arrow::LogicalType::INT32"
+ LogicalType_UINT64" arrow::LogicalType::UINT64"
+ LogicalType_INT64" arrow::LogicalType::INT64"
+
+ LogicalType_FLOAT" arrow::LogicalType::FLOAT"
+ LogicalType_DOUBLE" arrow::LogicalType::DOUBLE"
+
+ LogicalType_STRING" arrow::LogicalType::STRING"
+
+ LogicalType_LIST" arrow::LogicalType::LIST"
+ LogicalType_STRUCT" arrow::LogicalType::STRUCT"
+
+ cdef cppclass CDataType" arrow::DataType":
+ LogicalType type
+ c_bool nullable
+
+ c_bool Equals(const CDataType* other)
+
+ c_string ToString()
+
+ cdef cppclass MemoryPool" arrow::MemoryPool":
+ int64_t bytes_allocated()
+
+ cdef cppclass CListType" arrow::ListType"(CDataType):
+ CListType(const shared_ptr[CDataType]& value_type,
+ c_bool nullable)
+
+ cdef cppclass CStringType" arrow::StringType"(CDataType):
+ pass
+
+ cdef cppclass CField" arrow::Field":
+ c_string name
+ shared_ptr[CDataType] type
+
+ CField(const c_string& name, const shared_ptr[CDataType]& type)
+
+ cdef cppclass CStructType" arrow::StructType"(CDataType):
+ CStructType(const vector[shared_ptr[CField]]& fields,
+ c_bool nullable)
+
+ cdef cppclass CSchema" arrow::Schema":
+ CSchema(const shared_ptr[CField]& fields)
+
+ cdef cppclass CArray" arrow::Array":
+ const shared_ptr[CDataType]& type()
+
+ int32_t length()
+ int32_t null_count()
+ LogicalType logical_type()
+
+ c_bool IsNull(int i)
+
+ cdef cppclass CUInt8Array" arrow::UInt8Array"(CArray):
+ pass
+
+ cdef cppclass CInt8Array" arrow::Int8Array"(CArray):
+ pass
+
+ cdef cppclass CListArray" arrow::ListArray"(CArray):
+ pass
+
+ cdef cppclass CStringArray" arrow::StringArray"(CListArray):
+ pass
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/includes/common.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/includes/common.pxd b/python/arrow/includes/common.pxd
index f2fc826..839427a 100644
--- a/python/arrow/includes/common.pxd
+++ b/python/arrow/includes/common.pxd
@@ -19,7 +19,7 @@
from libc.stdint cimport *
from libcpp cimport bool as c_bool
-from libcpp.string cimport string
+from libcpp.string cimport string as c_string
from libcpp.vector cimport vector
# This must be included for cerr and other things to work
@@ -29,6 +29,8 @@ cdef extern from "<iostream>":
cdef extern from "<memory>" namespace "std" nogil:
cdef cppclass shared_ptr[T]:
+ shared_ptr()
+ shared_ptr(T*)
T* get()
void reset()
void reset(T* p)
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/includes/pyarrow.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/includes/pyarrow.pxd b/python/arrow/includes/pyarrow.pxd
index dcef663..3eed5b8 100644
--- a/python/arrow/includes/pyarrow.pxd
+++ b/python/arrow/includes/pyarrow.pxd
@@ -18,6 +18,28 @@
# distutils: language = c++
from arrow.includes.common cimport *
+from arrow.includes.arrow cimport (CArray, CDataType, LogicalType,
+ MemoryPool)
cdef extern from "pyarrow/api.h" namespace "pyarrow" nogil:
- pass
+ # We can later add more of the common status factory methods as needed
+ cdef Status Status_OK "Status::OK"()
+
+ cdef cppclass Status:
+ Status()
+
+ c_string ToString()
+
+ c_bool ok()
+ c_bool IsOutOfMemory()
+ c_bool IsKeyError()
+ c_bool IsTypeError()
+ c_bool IsIOError()
+ c_bool IsValueError()
+ c_bool IsNotImplemented()
+ c_bool IsArrowError()
+
+ shared_ptr[CDataType] GetPrimitiveType(LogicalType type, c_bool nullable)
+ Status ConvertPySequence(object obj, shared_ptr[CArray]* out)
+
+ MemoryPool* GetMemoryPool()
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/scalar.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/scalar.pxd b/python/arrow/scalar.pxd
new file mode 100644
index 0000000..e193c09
--- /dev/null
+++ b/python/arrow/scalar.pxd
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from arrow.includes.common cimport *
+from arrow.includes.arrow cimport CArray, CListArray
+
+from arrow.schema cimport DataType
+
+cdef class Scalar:
+ cdef readonly:
+ DataType type
+
+
+cdef class NAType(Scalar):
+ pass
+
+
+cdef class ArrayValue(Scalar):
+ cdef:
+ shared_ptr[CArray] array
+ int index
+
+
+cdef class Int8Value(ArrayValue):
+ pass
+
+
+cdef class ListValue(ArrayValue):
+ pass
+
+
+cdef class StringValue(ArrayValue):
+ pass
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/scalar.pyx
----------------------------------------------------------------------
diff --git a/python/arrow/scalar.pyx b/python/arrow/scalar.pyx
new file mode 100644
index 0000000..78dadec
--- /dev/null
+++ b/python/arrow/scalar.pyx
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import arrow.schema as schema
+
+cdef class NAType(Scalar):
+
+ def __cinit__(self):
+ self.type = schema.null()
+
+ def __repr__(self):
+ return 'NA'
+
+NA = NAType()
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/schema.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/schema.pxd b/python/arrow/schema.pxd
new file mode 100644
index 0000000..487c246
--- /dev/null
+++ b/python/arrow/schema.pxd
@@ -0,0 +1,39 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from arrow.includes.common cimport shared_ptr
+from arrow.includes.arrow cimport CDataType, CField, CSchema
+
+cdef class DataType:
+ cdef:
+ shared_ptr[CDataType] sp_type
+ CDataType* type
+
+ cdef init(self, const shared_ptr[CDataType]& type)
+
+cdef class Field:
+ cdef:
+ shared_ptr[CField] sp_field
+ CField* field
+
+ cdef readonly:
+ DataType type
+
+cdef class Schema:
+ cdef:
+ shared_ptr[CSchema] sp_schema
+ CSchema* schema
http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/schema.pyx
----------------------------------------------------------------------
diff --git a/python/arrow/schema.pyx b/python/arrow/schema.pyx
new file mode 100644
index 0000000..63cd6e8
--- /dev/null
+++ b/python/arrow/schema.pyx
@@ -0,0 +1,150 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+########################################
+# Data types, fields, schemas, and so forth
+
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+
+from arrow.compat import frombytes, tobytes
+from arrow.includes.arrow cimport *
+cimport arrow.includes.pyarrow as pyarrow
+
+cimport cpython
+
+cdef class DataType:
+
+ def __cinit__(self):
+ pass
+
+ cdef init(self, const shared_ptr[CDataType]& type):
+ self.sp_type = type
+ self.type = type.get()
+
+ def __str__(self):
+ return frombytes(self.type.ToString())
+
+ def __repr__(self):
+ return 'DataType({0})'.format(str(self))
+
+ def __richcmp__(DataType self, DataType other, int op):
+ if op == cpython.Py_EQ:
+ return self.type.Equals(other.type)
+ elif op == cpython.Py_NE:
+ return not self.type.Equals(other.type)
+ else:
+ raise TypeError('Invalid comparison')
+
+
+cdef class Field:
+
+ def __cinit__(self, object name, DataType type):
+ self.type = type
+ self.sp_field.reset(new CField(tobytes(name), type.sp_type))
+ self.field = self.sp_field.get()
+
+ def __repr__(self):
+ return 'Field({0!r}, type={1})'.format(self.name, str(self.type))
+
+ property name:
+
+ def __get__(self):
+ return frombytes(self.field.name)
+
+cdef dict _type_cache = {}
+
+cdef DataType primitive_type(LogicalType type, bint nullable=True):
+ if (type, nullable) in _type_cache:
+ return _type_cache[type, nullable]
+
+ cdef DataType out = DataType()
+ out.init(pyarrow.GetPrimitiveType(type, nullable))
+
+ _type_cache[type, nullable] = out
+ return out
+
+#------------------------------------------------------------
+# Type factory functions
+
+def field(name, type):
+ return Field(name, type)
+
+def null():
+ return primitive_type(LogicalType_NA)
+
+def bool_(c_bool nullable=True):
+ return primitive_type(LogicalType_BOOL, nullable)
+
+def uint8(c_bool nullable=True):
+ return primitive_type(LogicalType_UINT8, nullable)
+
+def int8(c_bool nullable=True):
+ return primitive_type(LogicalType_INT8, nullable)
+
+def uint16(c_bool nullable=True):
+ return primitive_type(LogicalType_UINT16, nullable)
+
+def int16(c_bool nullable=True):
+ return primitive_type(LogicalType_INT16, nullable)
+
+def uint32(c_bool nullable=True):
+ return primitive_type(LogicalType_UINT32, nullable)
+
+def int32(c_bool nullable=True):
+ return primitive_type(LogicalType_INT32, nullable)
+
+def uint64(c_bool nullable=True):
+ return primitive_type(LogicalType_UINT64, nullable)
+
+def int64(c_bool nullable=True):
+ return primitive_type(LogicalType_INT64, nullable)
+
+def float_(c_bool nullable=True):
+ return primitive_type(LogicalType_FLOAT, nullable)
+
+def double(c_bool nullable=True):
+ return primitive_type(LogicalType_DOUBLE, nullable)
+
+def string(c_bool nullable=True):
+ """
+ UTF8 string
+ """
+ return primitive_type(LogicalType_STRING, nullable)
+
+def list_(DataType value_type, c_bool nullable=True):
+ cdef DataType out = DataType()
+ out.init(shared_ptr[CDataType](
+ new CListType(value_type.sp_type, nullable)))
+ return out
+
+def struct(fields, c_bool nullable=True):
+ """
+
+ """
+ cdef:
+ DataType out = DataType()
+ Field field
+ vector[shared_ptr[CField]] c_fields
+
+ for field in fields:
+ c_fields.push_back(field.sp_field)
+
+ out.init(shared_ptr[CDataType](
+ new CStructType(c_fields, nullable)))
+ return out