You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2020/06/19 22:25:57 UTC

[orc] branch branch-1.5 updated: ORC-638: Enlarge MultiValueColumnVector by multiples when child arraySize is not enough.

This is an automated email from the ASF dual-hosted git repository.

omalley pushed a commit to branch branch-1.5
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/branch-1.5 by this push:
     new ce770ad  ORC-638: Enlarge MultiValueColumnVector by multiples when child arraySize is not enough.
ce770ad is described below

commit ce770addafec36bbe5ab324fa48216d524fd2669
Author: Lei Sun <au...@gmail.com>
AuthorDate: Thu Jun 4 15:17:16 2020 -0700

    ORC-638: Enlarge MultiValueColumnVector by multiples when child arraySize is not enough.
    
    Fixes #512
    
    Signed-off-by: Owen O'Malley <om...@apache.org>
---
 .../org/apache/orc/mapred/OrcMapredRecordWriter.java | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/java/mapreduce/src/java/org/apache/orc/mapred/OrcMapredRecordWriter.java b/java/mapreduce/src/java/org/apache/orc/mapred/OrcMapredRecordWriter.java
index 59f89f7..94f8be2 100644
--- a/java/mapreduce/src/java/org/apache/orc/mapred/OrcMapredRecordWriter.java
+++ b/java/mapreduce/src/java/org/apache/orc/mapred/OrcMapredRecordWriter.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -55,6 +55,8 @@ import java.util.Map;
 
 public class OrcMapredRecordWriter<V extends Writable>
     implements RecordWriter<NullWritable, V> {
+  // The factor that we grow lists and maps by when they are too small.
+  private static final int GROWTH_FACTOR = 3;
   private final Writer writer;
   private final VectorizedRowBatch batch;
   private final TypeDescription schema;
@@ -146,7 +148,11 @@ public class OrcMapredRecordWriter<V extends Writable>
     vector.offsets[row] = vector.childCount;
     vector.lengths[row] = value.size();
     vector.childCount += vector.lengths[row];
-    vector.child.ensureSize(vector.childCount, vector.offsets[row] != 0);
+    if (vector.child.isNull.length < vector.childCount) {
+      vector.child.ensureSize(vector.childCount * GROWTH_FACTOR,
+          vector.offsets[row] != 0);
+    }
+
     for(int e=0; e < vector.lengths[row]; ++e) {
       setColumn(elemType, vector.child, (int) vector.offsets[row] + e,
           (Writable) value.get(e));
@@ -162,8 +168,14 @@ public class OrcMapredRecordWriter<V extends Writable>
     vector.offsets[row] = vector.childCount;
     vector.lengths[row] = value.size();
     vector.childCount += vector.lengths[row];
-    vector.keys.ensureSize(vector.childCount, vector.offsets[row] != 0);
-    vector.values.ensureSize(vector.childCount, vector.offsets[row] != 0);
+    if (vector.keys.isNull.length < vector.childCount) {
+      vector.keys.ensureSize(vector.childCount * GROWTH_FACTOR,
+          vector.offsets[row] != 0);
+    }
+    if (vector.values.isNull.length < vector.childCount) {
+      vector.values.ensureSize(vector.childCount * GROWTH_FACTOR,
+          vector.offsets[row] != 0);
+    }
     int e = 0;
     for(Map.Entry<?,?> entry: value.entrySet()) {
       setColumn(keyType, vector.keys, (int) vector.offsets[row] + e,