You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by dz...@apache.org on 2022/02/03 12:52:33 UTC

[drill] branch master updated: DRILL-8124: Fix implicit file issue with EVF 2 (#2451)

This is an automated email from the ASF dual-hosted git repository.

dzamo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git


The following commit(s) were added to refs/heads/master by this push:
     new 47acf50  DRILL-8124: Fix implicit file issue with EVF 2 (#2451)
47acf50 is described below

commit 47acf504bf8c3052b32b6764d78f5767ee7c12f6
Author: Paul Rogers <pa...@users.noreply.github.com>
AuthorDate: Thu Feb 3 04:52:27 2022 -0800

    DRILL-8124: Fix implicit file issue with EVF 2 (#2451)
---
 .../impl/scan/v3/file/ImplicitColumnMarker.java    | 27 ++++++++++++++++++----
 .../impl/scan/v3/file/ImplicitColumnResolver.java  |  7 +++++-
 .../impl/scan/v3/lifecycle/OutputBatchBuilder.java | 14 ++++++++++-
 .../impl/scan/v3/schema/AbstractSchemaTracker.java |  1 +
 .../impl/scan/v3/schema/MutableTupleSchema.java    |  1 +
 .../validate/IteratorValidatorBatchIterator.java   |  2 +-
 .../apache/drill/exec/record/VectorContainer.java  |  2 +-
 7 files changed, 45 insertions(+), 9 deletions(-)

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/scan/v3/file/ImplicitColumnMarker.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/scan/v3/file/ImplicitColumnMarker.java
index e8c9790..981432a 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/scan/v3/file/ImplicitColumnMarker.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/scan/v3/file/ImplicitColumnMarker.java
@@ -24,15 +24,23 @@ import org.apache.drill.exec.store.ColumnExplorer.ImplicitInternalFileColumns;
  * Marks a column as implicit and provides a function to resolve an
  * implicit column given a description of the input file.
  */
-public interface ImplicitColumnMarker {
-  String resolve(FileDescrip fileInfo);
+public abstract class ImplicitColumnMarker {
+  private int index = -1;
+
+  public void setIndex(int index) {
+    this.index = index;
+  }
+
+  public int index() { return index; }
+
+  public abstract String resolve(FileDescrip fileInfo);
 
   /**
    * Marker for a file-based, non-internal implicit column that
    * extracts parts of the file name as defined by the implicit
    * column definition.
    */
-  public class FileImplicitMarker implements ImplicitColumnMarker {
+  public static class FileImplicitMarker extends ImplicitColumnMarker {
     public final ImplicitFileColumns defn;
 
     public FileImplicitMarker(ImplicitFileColumns defn) {
@@ -43,6 +51,9 @@ public interface ImplicitColumnMarker {
     public String resolve(FileDescrip fileInfo) {
       return defn.getValue(fileInfo.filePath());
     }
+
+    @Override
+    public String toString() { return defn.name(); }
   }
 
   /**
@@ -50,7 +61,7 @@ public interface ImplicitColumnMarker {
    * root folder. Partitions that reference non-existent directory levels
    * are null.
    */
-  public class PartitionColumnMarker implements ImplicitColumnMarker {
+  public static class PartitionColumnMarker extends ImplicitColumnMarker {
     private final int partition;
 
     public PartitionColumnMarker(int partition) {
@@ -61,9 +72,12 @@ public interface ImplicitColumnMarker {
     public String resolve(FileDescrip fileInfo) {
       return fileInfo.partition(partition);
     }
+
+    @Override
+    public String toString() { return "dir" + partition; }
   }
 
-  public class InternalColumnMarker implements ImplicitColumnMarker {
+  public static class InternalColumnMarker extends ImplicitColumnMarker {
     public final ImplicitInternalFileColumns defn;
 
     public InternalColumnMarker(ImplicitInternalFileColumns defn) {
@@ -95,5 +109,8 @@ public interface ImplicitColumnMarker {
     private String valueOf(Object value) {
       return value == null ? null : String.valueOf(value);
     }
+
+    @Override
+    public String toString() { return defn.name(); }
   }
 }
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/scan/v3/file/ImplicitColumnResolver.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/scan/v3/file/ImplicitColumnResolver.java
index 3ec7f35..766e104 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/scan/v3/file/ImplicitColumnResolver.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/scan/v3/file/ImplicitColumnResolver.java
@@ -18,6 +18,7 @@
 package org.apache.drill.exec.physical.impl.scan.v3.file;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
@@ -141,7 +142,11 @@ public class ImplicitColumnResolver {
 
     protected ParseResult(List<ImplicitColumnMarker> columns, TupleMetadata schema,
         boolean isMetadataScan) {
-      this.columns = columns;
+      ImplicitColumnMarker reordered[] = new ImplicitColumnMarker[columns.size()];
+      for (ImplicitColumnMarker col : columns) {
+        reordered[col.index()] = col;
+      }
+      this.columns = Arrays.asList(reordered);
       this.schema = schema;
       this.isMetadataScan = isMetadataScan;
     }
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/scan/v3/lifecycle/OutputBatchBuilder.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/scan/v3/lifecycle/OutputBatchBuilder.java
index 6c31149..f18d9c0 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/scan/v3/lifecycle/OutputBatchBuilder.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/scan/v3/lifecycle/OutputBatchBuilder.java
@@ -28,6 +28,7 @@ import org.apache.drill.exec.record.metadata.ColumnMetadata;
 import org.apache.drill.exec.record.metadata.TupleMetadata;
 import org.apache.drill.exec.vector.ValueVector;
 import org.apache.drill.exec.vector.complex.AbstractMapVector;
+import org.apache.drill.exec.vector.complex.MapVector;
 import org.apache.drill.shaded.guava.com.google.common.base.Preconditions;
 
 /**
@@ -234,6 +235,7 @@ public class OutputBatchBuilder {
   private final List<BatchSource> sources;
   private final Object vectorSources[];
   private final VectorContainer outputContainer;
+  private final List<MapVector> mapVectors = new ArrayList<>();
 
   public OutputBatchBuilder(TupleMetadata outputSchema, List<BatchSource> sources,
       BufferAllocator allocator) {
@@ -277,11 +279,18 @@ public class OutputBatchBuilder {
   @SuppressWarnings("unchecked")
   private void physicalProjection() {
     outputContainer.removeAll();
+    mapVectors.clear();
     for (int i = 0; i < outputSchema.size(); i++) {
-      ValueVector outputVector;
       ColumnMetadata outputCol = outputSchema.metadata(i);
+      ValueVector outputVector;
       if (outputCol.isMap()) {
         outputVector = buildTopMap(outputCol, (List<VectorSource>) vectorSources[i]);
+
+        // Map vectors are a nuisance: they carry their own value could which
+        // must be set separately from the underling data vectors.
+        if (outputVector instanceof MapVector) {
+          mapVectors.add((MapVector) outputVector);
+        }
       } else {
         outputVector = getVector((VectorSource) vectorSources[i]);
       }
@@ -309,6 +318,9 @@ public class OutputBatchBuilder {
 
   public void load(int rowCount) {
     outputContainer.setRecordCount(rowCount);
+    for (MapVector v : mapVectors) {
+      v.setMapValueCount(rowCount);
+    }
   }
 
   public VectorContainer outputContainer() { return outputContainer; }
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/scan/v3/schema/AbstractSchemaTracker.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/scan/v3/schema/AbstractSchemaTracker.java
index 7646477..58492fc 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/scan/v3/schema/AbstractSchemaTracker.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/scan/v3/schema/AbstractSchemaTracker.java
@@ -127,6 +127,7 @@ public abstract class AbstractSchemaTracker implements ScanSchemaTracker {
     TupleMetadata implicitCols = new TupleSchema();
     for (ColumnHandle handle : schema.columns()) {
       if (handle.isImplicit()) {
+        handle.setIndex(implicitCols.size());
         implicitCols.addColumn(handle.column());
       }
     }
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/scan/v3/schema/MutableTupleSchema.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/scan/v3/schema/MutableTupleSchema.java
index 89883b2..e17a1d3 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/scan/v3/schema/MutableTupleSchema.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/scan/v3/schema/MutableTupleSchema.java
@@ -98,6 +98,7 @@ public class MutableTupleSchema {
 
     public ColumnMetadata column() { return col; }
     public boolean isImplicit() { return marker != null; }
+    public void setIndex(int index) { marker.setIndex(index); }
 
     @Override
     public String toString() {
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/IteratorValidatorBatchIterator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/IteratorValidatorBatchIterator.java
index e857364..0c454d7 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/IteratorValidatorBatchIterator.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/IteratorValidatorBatchIterator.java
@@ -335,7 +335,7 @@ public class IteratorValidatorBatchIterator implements CloseableRecordBatch {
 
   private void validateBatch() {
     if (validateBatches || VALIDATE_VECTORS) {
-      if (! BatchValidator.validate(incoming)) {
+      if (!BatchValidator.validate(incoming)) {
         throw new IllegalStateException(
             "Batch validation failed. Source operator: " +
             incoming.getClass().getSimpleName());
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorContainer.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorContainer.java
index 17d7b53..9d1c6a3 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorContainer.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorContainer.java
@@ -396,7 +396,7 @@ public class VectorContainer implements VectorAccessible {
 
   public void setRecordCount(int recordCount) {
     this.recordCount = recordCount;
-    initialized = true;
+    this.initialized = true;
   }
 
   /**