You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by mc...@apache.org on 2020/07/15 17:37:41 UTC

[incubator-pinot] branch hotfix-orc created (now e1ae656)

This is an automated email from the ASF dual-hosted git repository.

mcvsubbu pushed a change to branch hotfix-orc
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git.


      at e1ae656  Fix ORC Record reader to ignore extra fields (#5645)

This branch includes the following new commits:

     new e1ae656  Fix ORC Record reader to ignore extra fields (#5645)

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 01/01: Fix ORC Record reader to ignore extra fields (#5645)

Posted by mc...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

mcvsubbu pushed a commit to branch hotfix-orc
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit e1ae65682f7a54c2aac5f01be773691918452f8e
Author: Subbu Subramaniam <mc...@users.noreply.github.com>
AuthorDate: Mon Jul 13 15:36:15 2020 -0700

    Fix ORC Record reader to ignore extra fields (#5645)
    
    * Fix ORC Record reader to ignore extra fields
    
    Fixing an issue introduced in PR #5267
    We should not be validating the type of fields that we don't care about.
    Cleaned up the messages and exceptions thrown so that we know which
    field is the problematic one.
    
    * Update pinot-plugins/pinot-input-format/pinot-orc/src/main/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordReader.java
    
    Co-authored-by: Xiaotian (Jackie) Jiang <17...@users.noreply.github.com>
    
    Co-authored-by: Xiaotian (Jackie) Jiang <17...@users.noreply.github.com>
---
 .../plugin/inputformat/orc/ORCRecordReader.java    | 24 +++++++++++-----------
 .../inputformat/orc/ORCRecordReaderTest.java       |  2 +-
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/pinot-plugins/pinot-input-format/pinot-orc/src/main/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordReader.java b/pinot-plugins/pinot-input-format/pinot-orc/src/main/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordReader.java
index dbc3f68..b792c57 100644
--- a/pinot-plugins/pinot-input-format/pinot-orc/src/main/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordReader.java
+++ b/pinot-plugins/pinot-input-format/pinot-orc/src/main/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordReader.java
@@ -97,8 +97,8 @@ public class ORCRecordReader implements RecordReader {
         if (category == TypeDescription.Category.LIST) {
           // Multi-value field
           TypeDescription.Category childCategory = fieldType.getChildren().get(0).getCategory();
-          Preconditions.checkState(isSupportedSingleValueType(childCategory), "Illegal multi-value field type: %s",
-              childCategory);
+          Preconditions.checkState(isSupportedSingleValueType(childCategory), "Illegal multi-value field type: %s (field %s)",
+              childCategory, field);
           // NOTE: LIST is stored as 2 vectors
           int fieldId = fieldType.getId();
           orcReaderInclude[fieldId] = true;
@@ -108,10 +108,10 @@ public class ORCRecordReader implements RecordReader {
           List<TypeDescription> children = fieldType.getChildren();
           TypeDescription.Category keyCategory = children.get(0).getCategory();
           Preconditions
-              .checkState(isSupportedSingleValueType(keyCategory), "Illegal map key field type: %s", keyCategory);
+              .checkState(isSupportedSingleValueType(keyCategory), "Illegal map key field type: %s (field %s)", keyCategory, field);
           TypeDescription.Category valueCategory = children.get(1).getCategory();
           Preconditions
-              .checkState(isSupportedSingleValueType(valueCategory), "Illegal map value field type: %s", valueCategory);
+              .checkState(isSupportedSingleValueType(valueCategory), "Illegal map value field type: %s (field %s)", valueCategory, field);
           // NOTE: MAP is stored as 3 vectors
           int fieldId = fieldType.getId();
           orcReaderInclude[fieldId] = true;
@@ -120,11 +120,11 @@ public class ORCRecordReader implements RecordReader {
         } else {
           // Single-value field
           Preconditions
-              .checkState(isSupportedSingleValueType(category), "Illegal single-value field type: %s", category);
+              .checkState(isSupportedSingleValueType(category), "Illegal single-value field type: %s (field %s)", category, field);
           orcReaderInclude[fieldType.getId()] = true;
         }
+        _includeOrcFields[i] = true;
       }
-      _includeOrcFields[i] = true;
     }
 
     _orcRecordReader = orcReader.rows(new Reader.Options().include(orcReaderInclude));
@@ -186,7 +186,7 @@ public class ORCRecordReader implements RecordReader {
           int length = (int) listColumnVector.lengths[rowId];
           List<Object> values = new ArrayList<>(length);
           for (int j = 0; j < length; j++) {
-            Object value = extractSingleValue(listColumnVector.child, offset + j, childCategory);
+            Object value = extractSingleValue(field, listColumnVector.child, offset + j, childCategory);
             // NOTE: Only keep non-null values
             // TODO: Revisit
             if (value != null) {
@@ -216,8 +216,8 @@ public class ORCRecordReader implements RecordReader {
           Map<Object, Object> map = new HashMap<>();
           for (int j = 0; j < length; j++) {
             int childRowId = offset + j;
-            Object key = extractSingleValue(mapColumnVector.keys, childRowId, keyCategory);
-            Object value = extractSingleValue(mapColumnVector.values, childRowId, valueCategory);
+            Object key = extractSingleValue(field, mapColumnVector.keys, childRowId, keyCategory);
+            Object value = extractSingleValue(field, mapColumnVector.values, childRowId, valueCategory);
             map.put(key, value);
           }
           reuse.putValue(field, map);
@@ -226,7 +226,7 @@ public class ORCRecordReader implements RecordReader {
         }
       } else {
         // Single-value field
-        reuse.putValue(field, extractSingleValue(_rowBatch.cols[i], _nextRowId, category));
+        reuse.putValue(field, extractSingleValue(field, _rowBatch.cols[i], _nextRowId, category));
       }
     }
 
@@ -238,7 +238,7 @@ public class ORCRecordReader implements RecordReader {
   }
 
   @Nullable
-  private static Object extractSingleValue(ColumnVector columnVector, int rowId, TypeDescription.Category category) {
+  private static Object extractSingleValue(String field, ColumnVector columnVector, int rowId, TypeDescription.Category category) {
     if (columnVector.isRepeating) {
       rowId = 0;
     }
@@ -324,7 +324,7 @@ public class ORCRecordReader implements RecordReader {
         }
       default:
         // Unsupported types
-        throw new IllegalStateException("Unsupported field type: " + category);
+        throw new IllegalStateException("Unsupported field type: " + category + " for field: " + field);
     }
   }
 
diff --git a/pinot-plugins/pinot-input-format/pinot-orc/src/test/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordReaderTest.java b/pinot-plugins/pinot-input-format/pinot-orc/src/test/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordReaderTest.java
index 5691fe2..69c5966 100644
--- a/pinot-plugins/pinot-input-format/pinot-orc/src/test/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordReaderTest.java
+++ b/pinot-plugins/pinot-input-format/pinot-orc/src/test/java/org/apache/pinot/plugin/inputformat/orc/ORCRecordReaderTest.java
@@ -52,7 +52,7 @@ public class ORCRecordReaderTest extends AbstractRecordReaderTest {
   protected void writeRecordsToFile(List<Map<String, Object>> recordsToWrite)
       throws Exception {
     TypeDescription schema = TypeDescription.fromString(
-        "struct<dim_sv_int:int,dim_sv_long:bigint,dim_sv_float:float,dim_sv_double:double,dim_sv_string:string,dim_mv_int:array<int>,dim_mv_long:array<bigint>,dim_mv_float:array<float>,dim_mv_double:array<double>,dim_mv_string:array<string>,met_int:int,met_long:bigint,met_float:float,met_double:double>");
+        "struct<dim_sv_int:int,dim_sv_long:bigint,dim_sv_float:float,dim_sv_double:double,dim_sv_string:string,dim_mv_int:array<int>,dim_mv_long:array<bigint>,dim_mv_float:array<float>,dim_mv_double:array<double>,dim_mv_string:array<string>,met_int:int,met_long:bigint,met_float:float,met_double:double,extra_field:struct<f1:int,f2:int>>");
     Writer writer = OrcFile.createWriter(new Path(_dataFile.getAbsolutePath()),
         OrcFile.writerOptions(new Configuration()).setSchema(schema));
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org