You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/09/15 13:31:25 UTC

[GitHub] [arrow] bkietz commented on a change in pull request #8160: ARROW-7302: [C++] CSV: allow dictionary types in explicit column types

bkietz commented on a change in pull request #8160:
URL: https://github.com/apache/arrow/pull/8160#discussion_r488661155



##########
File path: cpp/src/arrow/csv/converter.cc
##########
@@ -601,18 +631,30 @@ Result<std::shared_ptr<DictionaryConverter>> DictionaryConverter::Make(
   DictionaryConverter* ptr;
 
   switch (type->id()) {
-#define CONVERTER_CASE(TYPE_ID, CONVERTER_TYPE)    \
-  case TYPE_ID:                                    \
-    ptr = new CONVERTER_TYPE(type, options, pool); \
+#define CONVERTER_CASE(TYPE_ID, TYPE, VALUE_DECODER_TYPE)                              \
+  case TYPE_ID:                                                                        \
+    ptr = new TypedDictionaryConverter<TYPE, VALUE_DECODER_TYPE>(type, options, pool); \
     break;
 
-    CONVERTER_CASE(Type::BINARY, (DictionaryBinaryConverter<BinaryType, false>))
+    // XXX Are 32-bit types useful?
+    CONVERTER_CASE(Type::INT32, Int32Type, NumericValueDecoder<Int32Type>)
+    CONVERTER_CASE(Type::INT64, Int64Type, NumericValueDecoder<Int64Type>)
+    CONVERTER_CASE(Type::UINT32, UInt32Type, NumericValueDecoder<UInt32Type>)
+    CONVERTER_CASE(Type::UINT64, UInt64Type, NumericValueDecoder<UInt64Type>)
+    CONVERTER_CASE(Type::FLOAT, FloatType, NumericValueDecoder<FloatType>)
+    CONVERTER_CASE(Type::DOUBLE, DoubleType, NumericValueDecoder<DoubleType>)
+    CONVERTER_CASE(Type::DECIMAL, Decimal128Type, DecimalValueDecoder)
+    CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryType,
+                   FixedSizeBinaryValueDecoder)
+    CONVERTER_CASE(Type::BINARY, BinaryType, BinaryValueDecoder<false>)
 

Review comment:
       nit:
   ```suggestion
   #undef CONVERTER_CASE
   ```

##########
File path: cpp/src/arrow/csv/converter.cc
##########
@@ -601,18 +631,30 @@ Result<std::shared_ptr<DictionaryConverter>> DictionaryConverter::Make(
   DictionaryConverter* ptr;
 
   switch (type->id()) {
-#define CONVERTER_CASE(TYPE_ID, CONVERTER_TYPE)    \
-  case TYPE_ID:                                    \
-    ptr = new CONVERTER_TYPE(type, options, pool); \
+#define CONVERTER_CASE(TYPE_ID, TYPE, VALUE_DECODER_TYPE)                              \
+  case TYPE_ID:                                                                        \
+    ptr = new TypedDictionaryConverter<TYPE, VALUE_DECODER_TYPE>(type, options, pool); \
     break;
 
-    CONVERTER_CASE(Type::BINARY, (DictionaryBinaryConverter<BinaryType, false>))
+    // XXX Are 32-bit types useful?
+    CONVERTER_CASE(Type::INT32, Int32Type, NumericValueDecoder<Int32Type>)
+    CONVERTER_CASE(Type::INT64, Int64Type, NumericValueDecoder<Int64Type>)
+    CONVERTER_CASE(Type::UINT32, UInt32Type, NumericValueDecoder<UInt32Type>)
+    CONVERTER_CASE(Type::UINT64, UInt64Type, NumericValueDecoder<UInt64Type>)
+    CONVERTER_CASE(Type::FLOAT, FloatType, NumericValueDecoder<FloatType>)
+    CONVERTER_CASE(Type::DOUBLE, DoubleType, NumericValueDecoder<DoubleType>)
+    CONVERTER_CASE(Type::DECIMAL, Decimal128Type, DecimalValueDecoder)
+    CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryType,
+                   FixedSizeBinaryValueDecoder)
+    CONVERTER_CASE(Type::BINARY, BinaryType, BinaryValueDecoder<false>)

Review comment:
       (feel free to defer this to a follow up)

##########
File path: cpp/src/arrow/csv/converter.cc
##########
@@ -601,18 +631,30 @@ Result<std::shared_ptr<DictionaryConverter>> DictionaryConverter::Make(
   DictionaryConverter* ptr;
 
   switch (type->id()) {
-#define CONVERTER_CASE(TYPE_ID, CONVERTER_TYPE)    \
-  case TYPE_ID:                                    \
-    ptr = new CONVERTER_TYPE(type, options, pool); \
+#define CONVERTER_CASE(TYPE_ID, TYPE, VALUE_DECODER_TYPE)                              \
+  case TYPE_ID:                                                                        \
+    ptr = new TypedDictionaryConverter<TYPE, VALUE_DECODER_TYPE>(type, options, pool); \
     break;
 
-    CONVERTER_CASE(Type::BINARY, (DictionaryBinaryConverter<BinaryType, false>))
+    // XXX Are 32-bit types useful?
+    CONVERTER_CASE(Type::INT32, Int32Type, NumericValueDecoder<Int32Type>)
+    CONVERTER_CASE(Type::INT64, Int64Type, NumericValueDecoder<Int64Type>)
+    CONVERTER_CASE(Type::UINT32, UInt32Type, NumericValueDecoder<UInt32Type>)
+    CONVERTER_CASE(Type::UINT64, UInt64Type, NumericValueDecoder<UInt64Type>)
+    CONVERTER_CASE(Type::FLOAT, FloatType, NumericValueDecoder<FloatType>)
+    CONVERTER_CASE(Type::DOUBLE, DoubleType, NumericValueDecoder<DoubleType>)
+    CONVERTER_CASE(Type::DECIMAL, Decimal128Type, DecimalValueDecoder)
+    CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryType,
+                   FixedSizeBinaryValueDecoder)
+    CONVERTER_CASE(Type::BINARY, BinaryType, BinaryValueDecoder<false>)
 
     case Type::STRING:
       if (options.check_utf8) {
-        ptr = new DictionaryBinaryConverter<StringType, true>(type, options, pool);
+        ptr = new TypedDictionaryConverter<StringType, BinaryValueDecoder<true>>(
+            type, options, pool);
       } else {
-        ptr = new DictionaryBinaryConverter<StringType, false>(type, options, pool);
+        ptr = new TypedDictionaryConverter<StringType, BinaryValueDecoder<false>>(
+            type, options, pool);
       }
       break;

Review comment:
       ```suggestion
         break;
   
       case Type::LARGE_STRING:
         if (options.check_utf8) {
           ptr = new TypedDictionaryConverter<LargeStringType, BinaryValueDecoder<true>>(
               type, options, pool);
         } else {
           ptr = new TypedDictionaryConverter<LargeStringType, BinaryValueDecoder<false>>(
               type, options, pool);
         }
         break; 
   ```

##########
File path: cpp/src/arrow/csv/converter.cc
##########
@@ -601,18 +631,30 @@ Result<std::shared_ptr<DictionaryConverter>> DictionaryConverter::Make(
   DictionaryConverter* ptr;

Review comment:
       Not really part of this patch, but: instead of having a raw pointer which is later upgraded to a shared pointer, please declare this as a smart pointer so it's always stack bound

##########
File path: cpp/src/arrow/csv/converter.cc
##########
@@ -601,18 +631,30 @@ Result<std::shared_ptr<DictionaryConverter>> DictionaryConverter::Make(
   DictionaryConverter* ptr;
 
   switch (type->id()) {
-#define CONVERTER_CASE(TYPE_ID, CONVERTER_TYPE)    \
-  case TYPE_ID:                                    \
-    ptr = new CONVERTER_TYPE(type, options, pool); \
+#define CONVERTER_CASE(TYPE_ID, TYPE, VALUE_DECODER_TYPE)                              \
+  case TYPE_ID:                                                                        \
+    ptr = new TypedDictionaryConverter<TYPE, VALUE_DECODER_TYPE>(type, options, pool); \
     break;
 
-    CONVERTER_CASE(Type::BINARY, (DictionaryBinaryConverter<BinaryType, false>))
+    // XXX Are 32-bit types useful?
+    CONVERTER_CASE(Type::INT32, Int32Type, NumericValueDecoder<Int32Type>)
+    CONVERTER_CASE(Type::INT64, Int64Type, NumericValueDecoder<Int64Type>)
+    CONVERTER_CASE(Type::UINT32, UInt32Type, NumericValueDecoder<UInt32Type>)
+    CONVERTER_CASE(Type::UINT64, UInt64Type, NumericValueDecoder<UInt64Type>)
+    CONVERTER_CASE(Type::FLOAT, FloatType, NumericValueDecoder<FloatType>)
+    CONVERTER_CASE(Type::DOUBLE, DoubleType, NumericValueDecoder<DoubleType>)
+    CONVERTER_CASE(Type::DECIMAL, Decimal128Type, DecimalValueDecoder)
+    CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryType,
+                   FixedSizeBinaryValueDecoder)
+    CONVERTER_CASE(Type::BINARY, BinaryType, BinaryValueDecoder<false>)

Review comment:
       IIUC this should all work for large_* as well
   ```suggestion
       CONVERTER_CASE(Type::LARGE_BINARY, LargeBinaryType, BinaryValueDecoder<false>)
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org