You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by xn...@apache.org on 2019/03/27 17:36:27 UTC

[orc] branch master updated: ORC-474: Add a clear() method to ColumnVectorBatch (#377)

This is an automated email from the ASF dual-hosted git repository.

xndai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/master by this push:
     new 132175d  ORC-474: Add a clear() method to ColumnVectorBatch (#377)
132175d is described below

commit 132175d2cbd6014f046a47b889013704879152b9
Author: Cedric Cellier <ri...@happyleptic.org>
AuthorDate: Wed Mar 27 18:36:23 2019 +0100

    ORC-474: Add a clear() method to ColumnVectorBatch (#377)
    
    So that users can merely call it after a batch is written to empty it
    recursively before adding new elements for the next batch.
---
 c++/include/orc/Vector.hh | 16 ++++++++++++++
 c++/src/Vector.cc         | 53 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)

diff --git a/c++/include/orc/Vector.hh b/c++/include/orc/Vector.hh
index 9391da3..5bdf27c 100644
--- a/c++/include/orc/Vector.hh
+++ b/c++/include/orc/Vector.hh
@@ -68,6 +68,12 @@ namespace orc {
     virtual void resize(uint64_t capacity);
 
     /**
+     * Empties the vector from all its elements, recursively.
+     * Do not alter the current capacity.
+     */
+    virtual void clear();
+
+    /**
      * Heap memory used by the batch.
      */
     virtual uint64_t getMemoryUsage();
@@ -89,6 +95,7 @@ namespace orc {
     DataBuffer<int64_t> data;
     std::string toString() const;
     void resize(uint64_t capacity);
+    void clear();
     uint64_t getMemoryUsage();
   };
 
@@ -97,6 +104,7 @@ namespace orc {
     virtual ~DoubleVectorBatch();
     std::string toString() const;
     void resize(uint64_t capacity);
+    void clear();
     uint64_t getMemoryUsage();
 
     DataBuffer<double> data;
@@ -107,6 +115,7 @@ namespace orc {
     virtual ~StringVectorBatch();
     std::string toString() const;
     void resize(uint64_t capacity);
+    void clear();
     uint64_t getMemoryUsage();
 
     // pointers to the start of each string
@@ -154,6 +163,7 @@ namespace orc {
     virtual ~StructVectorBatch();
     std::string toString() const;
     void resize(uint64_t capacity);
+    void clear();
     uint64_t getMemoryUsage();
     bool hasVariableLength();
 
@@ -165,6 +175,7 @@ namespace orc {
     virtual ~ListVectorBatch();
     std::string toString() const;
     void resize(uint64_t capacity);
+    void clear();
     uint64_t getMemoryUsage();
     bool hasVariableLength();
 
@@ -183,6 +194,7 @@ namespace orc {
     virtual ~MapVectorBatch();
     std::string toString() const;
     void resize(uint64_t capacity);
+    void clear();
     uint64_t getMemoryUsage();
     bool hasVariableLength();
 
@@ -203,6 +215,7 @@ namespace orc {
     virtual ~UnionVectorBatch();
     std::string toString() const;
     void resize(uint64_t capacity);
+    void clear();
     uint64_t getMemoryUsage();
     bool hasVariableLength();
 
@@ -235,6 +248,7 @@ namespace orc {
     virtual ~Decimal64VectorBatch();
     std::string toString() const;
     void resize(uint64_t capacity);
+    void clear();
     uint64_t getMemoryUsage();
 
     // total number of digits
@@ -260,6 +274,7 @@ namespace orc {
     virtual ~Decimal128VectorBatch();
     std::string toString() const;
     void resize(uint64_t capacity);
+    void clear();
     uint64_t getMemoryUsage();
 
     // total number of digits
@@ -291,6 +306,7 @@ namespace orc {
     virtual ~TimestampVectorBatch();
     std::string toString() const;
     void resize(uint64_t capacity);
+    void clear();
     uint64_t getMemoryUsage();
 
     // the number of seconds past 1 Jan 1970 00:00 UTC (aka time_t)
diff --git a/c++/src/Vector.cc b/c++/src/Vector.cc
index ae8ab0f..9043dde 100644
--- a/c++/src/Vector.cc
+++ b/c++/src/Vector.cc
@@ -49,6 +49,10 @@ namespace orc {
     }
   }
 
+  void ColumnVectorBatch::clear() {
+    numElements = 0;
+  }
+
   uint64_t ColumnVectorBatch::getMemoryUsage() {
     return static_cast<uint64_t>(notNull.capacity() * sizeof(char));
   }
@@ -80,6 +84,10 @@ namespace orc {
     }
   }
 
+  void LongVectorBatch::clear() {
+    numElements = 0;
+  }
+
   uint64_t LongVectorBatch::getMemoryUsage() {
     return ColumnVectorBatch::getMemoryUsage() +
         static_cast<uint64_t>(data.capacity() * sizeof(int64_t));
@@ -108,6 +116,10 @@ namespace orc {
     }
   }
 
+  void DoubleVectorBatch::clear() {
+    numElements = 0;
+  }
+
   uint64_t DoubleVectorBatch::getMemoryUsage() {
     return ColumnVectorBatch::getMemoryUsage()
           + static_cast<uint64_t>(data.capacity() * sizeof(double));
@@ -161,6 +173,10 @@ namespace orc {
     }
   }
 
+  void StringVectorBatch::clear() {
+    numElements = 0;
+  }
+
   uint64_t StringVectorBatch::getMemoryUsage() {
     return ColumnVectorBatch::getMemoryUsage()
           + static_cast<uint64_t>(data.capacity() * sizeof(char*)
@@ -194,6 +210,13 @@ namespace orc {
     ColumnVectorBatch::resize(cap);
   }
 
+  void StructVectorBatch::clear() {
+    for(size_t i=0; i < fields.size(); i++) {
+      fields[i]->clear();
+    }
+    numElements = 0;
+  }
+
   uint64_t StructVectorBatch::getMemoryUsage() {
     uint64_t memory = ColumnVectorBatch::getMemoryUsage();
     for (unsigned int i=0; i < fields.size(); i++) {
@@ -235,6 +258,11 @@ namespace orc {
     }
   }
 
+  void ListVectorBatch::clear() {
+    numElements = 0;
+    elements->clear();
+  }
+
   uint64_t ListVectorBatch::getMemoryUsage() {
     return ColumnVectorBatch::getMemoryUsage()
            + static_cast<uint64_t>(offsets.capacity() * sizeof(int64_t))
@@ -270,6 +298,12 @@ namespace orc {
     }
   }
 
+  void MapVectorBatch::clear() {
+    keys->clear();
+    elements->clear();
+    numElements = 0;
+  }
+
   uint64_t MapVectorBatch::getMemoryUsage() {
     return ColumnVectorBatch::getMemoryUsage()
            + static_cast<uint64_t>(offsets.capacity() * sizeof(int64_t))
@@ -315,6 +349,13 @@ namespace orc {
     }
   }
 
+  void UnionVectorBatch::clear() {
+    for(size_t i=0; i < children.size(); i++) {
+      children[i]->clear();
+    }
+    numElements = 0;
+  }
+
   uint64_t UnionVectorBatch::getMemoryUsage() {
     uint64_t memory = ColumnVectorBatch::getMemoryUsage()
                + static_cast<uint64_t>(tags.capacity() * sizeof(unsigned char)
@@ -362,6 +403,10 @@ namespace orc {
     }
   }
 
+  void Decimal64VectorBatch::clear() {
+    numElements = 0;
+  }
+
   uint64_t Decimal64VectorBatch::getMemoryUsage() {
     return ColumnVectorBatch::getMemoryUsage()
           + static_cast<uint64_t>(
@@ -396,6 +441,10 @@ namespace orc {
     }
   }
 
+  void Decimal128VectorBatch::clear() {
+    numElements = 0;
+  }
+
   uint64_t Decimal128VectorBatch::getMemoryUsage() {
     return ColumnVectorBatch::getMemoryUsage()
           + static_cast<uint64_t>(values.capacity() * sizeof(Int128)
@@ -455,6 +504,10 @@ namespace orc {
     }
   }
 
+  void TimestampVectorBatch::clear() {
+    numElements = 0;
+  }
+
   uint64_t TimestampVectorBatch::getMemoryUsage() {
     return ColumnVectorBatch::getMemoryUsage()
           + static_cast<uint64_t>(