You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/03/28 19:49:14 UTC
arrow git commit: ARROW-87: [C++] Add all four possible ways to
encode Decimals in Parquet to schema conversion
Repository: arrow
Updated Branches:
refs/heads/master 38897ee29 -> 2d8627cd8
ARROW-87: [C++] Add all four possible ways to encode Decimals in Parquet to schema conversion
See also: https://github.com/Parquet/parquet-format/blob/master/LogicalTypes.md#decimal
Author: Uwe L. Korn <uw...@xhochy.com>
Closes #48 from xhochy/arrow-87 and squashes the following commits:
05ca3be [Uwe L. Korn] Use parquet:: namespace instead of parquet_cpp
6bafc5f [Uwe L. Korn] ARROW-87: [C++] Add all four possible ways to encode Decimals in Parquet to schema conversion
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/2d8627cd
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/2d8627cd
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/2d8627cd
Branch: refs/heads/master
Commit: 2d8627cd81f83783b0ceb01d137a46b581ecba26
Parents: 38897ee
Author: Uwe L. Korn <uw...@xhochy.com>
Authored: Mon Mar 28 10:49:08 2016 -0700
Committer: Wes McKinney <we...@apache.org>
Committed: Mon Mar 28 10:49:08 2016 -0700
----------------------------------------------------------------------
cpp/src/arrow/parquet/parquet-schema-test.cc | 36 +++++++++++++++++++++++
cpp/src/arrow/parquet/schema.cc | 9 ++++++
2 files changed, 45 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/2d8627cd/cpp/src/arrow/parquet/parquet-schema-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/parquet/parquet-schema-test.cc b/cpp/src/arrow/parquet/parquet-schema-test.cc
index 02a8caf..a289ddb 100644
--- a/cpp/src/arrow/parquet/parquet-schema-test.cc
+++ b/cpp/src/arrow/parquet/parquet-schema-test.cc
@@ -22,6 +22,7 @@
#include "arrow/test-util.h"
#include "arrow/type.h"
+#include "arrow/types/decimal.h"
#include "arrow/util/status.h"
#include "arrow/parquet/schema.h"
@@ -46,6 +47,7 @@ const auto DOUBLE = std::make_shared<DoubleType>();
const auto UTF8 = std::make_shared<StringType>();
const auto BINARY = std::make_shared<ListType>(
std::make_shared<Field>("", UINT8));
+const auto DECIMAL_8_4 = std::make_shared<DecimalType>(8, 4);
class TestConvertParquetSchema : public ::testing::Test {
public:
@@ -119,6 +121,40 @@ TEST_F(TestConvertParquetSchema, ParquetFlatPrimitives) {
CheckFlatSchema(arrow_schema);
}
+TEST_F(TestConvertParquetSchema, ParquetFlatDecimals) {
+ std::vector<NodePtr> parquet_fields;
+ std::vector<std::shared_ptr<Field>> arrow_fields;
+
+ parquet_fields.push_back(
+ PrimitiveNode::Make("flba-decimal", Repetition::OPTIONAL,
+ ParquetType::FIXED_LEN_BYTE_ARRAY,
+ LogicalType::DECIMAL, 4, 8, 4));
+ arrow_fields.push_back(std::make_shared<Field>("flba-decimal", DECIMAL_8_4));
+
+ parquet_fields.push_back(
+ PrimitiveNode::Make("binary-decimal", Repetition::OPTIONAL,
+ ParquetType::BYTE_ARRAY,
+ LogicalType::DECIMAL, -1, 8, 4));
+ arrow_fields.push_back(std::make_shared<Field>("binary-decimal", DECIMAL_8_4));
+
+ parquet_fields.push_back(
+ PrimitiveNode::Make("int32-decimal", Repetition::OPTIONAL,
+ ParquetType::INT32,
+ LogicalType::DECIMAL, -1, 8, 4));
+ arrow_fields.push_back(std::make_shared<Field>("int32-decimal", DECIMAL_8_4));
+
+ parquet_fields.push_back(
+ PrimitiveNode::Make("int64-decimal", Repetition::OPTIONAL,
+ ParquetType::INT64,
+ LogicalType::DECIMAL, -1, 8, 4));
+ arrow_fields.push_back(std::make_shared<Field>("int64-decimal", DECIMAL_8_4));
+
+ auto arrow_schema = std::make_shared<Schema>(arrow_fields);
+ ASSERT_OK(ConvertSchema(parquet_fields));
+
+ CheckFlatSchema(arrow_schema);
+}
+
TEST_F(TestConvertParquetSchema, UnsupportedThings) {
std::vector<NodePtr> unsupported_nodes;
http://git-wip-us.apache.org/repos/asf/arrow/blob/2d8627cd/cpp/src/arrow/parquet/schema.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/parquet/schema.cc b/cpp/src/arrow/parquet/schema.cc
index d8eb2ad..14f4f5b 100644
--- a/cpp/src/arrow/parquet/schema.cc
+++ b/cpp/src/arrow/parquet/schema.cc
@@ -57,6 +57,9 @@ static Status FromByteArray(const PrimitiveNode* node, TypePtr* out) {
case LogicalType::UTF8:
*out = UTF8;
break;
+ case LogicalType::DECIMAL:
+ *out = MakeDecimalType(node);
+ break;
default:
// BINARY
*out = BINARY;
@@ -86,6 +89,9 @@ static Status FromInt32(const PrimitiveNode* node, TypePtr* out) {
case LogicalType::NONE:
*out = INT32;
break;
+ case LogicalType::DECIMAL:
+ *out = MakeDecimalType(node);
+ break;
default:
return Status::NotImplemented("Unhandled logical type for int32");
break;
@@ -98,6 +104,9 @@ static Status FromInt64(const PrimitiveNode* node, TypePtr* out) {
case LogicalType::NONE:
*out = INT64;
break;
+ case LogicalType::DECIMAL:
+ *out = MakeDecimalType(node);
+ break;
default:
return Status::NotImplemented("Unhandled logical type for int64");
break;