You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/03/28 19:49:14 UTC

arrow git commit: ARROW-87: [C++] Add all four possible ways to encode Decimals in Parquet to schema conversion

Repository: arrow
Updated Branches:
  refs/heads/master 38897ee29 -> 2d8627cd8


ARROW-87: [C++] Add all four possible ways to encode Decimals in Parquet to schema conversion

See also: https://github.com/Parquet/parquet-format/blob/master/LogicalTypes.md#decimal

Author: Uwe L. Korn <uw...@xhochy.com>

Closes #48 from xhochy/arrow-87 and squashes the following commits:

05ca3be [Uwe L. Korn] Use parquet:: namespace instead of parquet_cpp
6bafc5f [Uwe L. Korn] ARROW-87: [C++] Add all four possible ways to encode Decimals in Parquet to schema conversion


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/2d8627cd
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/2d8627cd
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/2d8627cd

Branch: refs/heads/master
Commit: 2d8627cd81f83783b0ceb01d137a46b581ecba26
Parents: 38897ee
Author: Uwe L. Korn <uw...@xhochy.com>
Authored: Mon Mar 28 10:49:08 2016 -0700
Committer: Wes McKinney <we...@apache.org>
Committed: Mon Mar 28 10:49:08 2016 -0700

----------------------------------------------------------------------
 cpp/src/arrow/parquet/parquet-schema-test.cc | 36 +++++++++++++++++++++++
 cpp/src/arrow/parquet/schema.cc              |  9 ++++++
 2 files changed, 45 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/2d8627cd/cpp/src/arrow/parquet/parquet-schema-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/parquet/parquet-schema-test.cc b/cpp/src/arrow/parquet/parquet-schema-test.cc
index 02a8caf..a289ddb 100644
--- a/cpp/src/arrow/parquet/parquet-schema-test.cc
+++ b/cpp/src/arrow/parquet/parquet-schema-test.cc
@@ -22,6 +22,7 @@
 
 #include "arrow/test-util.h"
 #include "arrow/type.h"
+#include "arrow/types/decimal.h"
 #include "arrow/util/status.h"
 
 #include "arrow/parquet/schema.h"
@@ -46,6 +47,7 @@ const auto DOUBLE = std::make_shared<DoubleType>();
 const auto UTF8 = std::make_shared<StringType>();
 const auto BINARY = std::make_shared<ListType>(
     std::make_shared<Field>("", UINT8));
+const auto DECIMAL_8_4 = std::make_shared<DecimalType>(8, 4);
 
 class TestConvertParquetSchema : public ::testing::Test {
  public:
@@ -119,6 +121,40 @@ TEST_F(TestConvertParquetSchema, ParquetFlatPrimitives) {
   CheckFlatSchema(arrow_schema);
 }
 
+TEST_F(TestConvertParquetSchema, ParquetFlatDecimals) {
+  std::vector<NodePtr> parquet_fields;
+  std::vector<std::shared_ptr<Field>> arrow_fields;
+
+  parquet_fields.push_back(
+      PrimitiveNode::Make("flba-decimal", Repetition::OPTIONAL,
+          ParquetType::FIXED_LEN_BYTE_ARRAY,
+          LogicalType::DECIMAL, 4, 8, 4));
+  arrow_fields.push_back(std::make_shared<Field>("flba-decimal", DECIMAL_8_4));
+
+  parquet_fields.push_back(
+      PrimitiveNode::Make("binary-decimal", Repetition::OPTIONAL,
+          ParquetType::BYTE_ARRAY,
+          LogicalType::DECIMAL, -1, 8, 4));
+  arrow_fields.push_back(std::make_shared<Field>("binary-decimal", DECIMAL_8_4));
+
+  parquet_fields.push_back(
+      PrimitiveNode::Make("int32-decimal", Repetition::OPTIONAL,
+          ParquetType::INT32,
+          LogicalType::DECIMAL, -1, 8, 4));
+  arrow_fields.push_back(std::make_shared<Field>("int32-decimal", DECIMAL_8_4));
+
+  parquet_fields.push_back(
+      PrimitiveNode::Make("int64-decimal", Repetition::OPTIONAL,
+          ParquetType::INT64,
+          LogicalType::DECIMAL, -1, 8, 4));
+  arrow_fields.push_back(std::make_shared<Field>("int64-decimal", DECIMAL_8_4));
+
+  auto arrow_schema = std::make_shared<Schema>(arrow_fields);
+  ASSERT_OK(ConvertSchema(parquet_fields));
+
+  CheckFlatSchema(arrow_schema);
+}
+
 TEST_F(TestConvertParquetSchema, UnsupportedThings) {
   std::vector<NodePtr> unsupported_nodes;
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/2d8627cd/cpp/src/arrow/parquet/schema.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/parquet/schema.cc b/cpp/src/arrow/parquet/schema.cc
index d8eb2ad..14f4f5b 100644
--- a/cpp/src/arrow/parquet/schema.cc
+++ b/cpp/src/arrow/parquet/schema.cc
@@ -57,6 +57,9 @@ static Status FromByteArray(const PrimitiveNode* node, TypePtr* out) {
     case LogicalType::UTF8:
       *out = UTF8;
       break;
+    case LogicalType::DECIMAL:
+      *out = MakeDecimalType(node);
+      break;
     default:
       // BINARY
       *out = BINARY;
@@ -86,6 +89,9 @@ static Status FromInt32(const PrimitiveNode* node, TypePtr* out) {
     case LogicalType::NONE:
       *out = INT32;
       break;
+    case LogicalType::DECIMAL:
+      *out = MakeDecimalType(node);
+      break;
     default:
       return Status::NotImplemented("Unhandled logical type for int32");
       break;
@@ -98,6 +104,9 @@ static Status FromInt64(const PrimitiveNode* node, TypePtr* out) {
     case LogicalType::NONE:
       *out = INT64;
       break;
+    case LogicalType::DECIMAL:
+      *out = MakeDecimalType(node);
+      break;
     default:
       return Status::NotImplemented("Unhandled logical type for int64");
       break;