You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by xn...@apache.org on 2019/03/27 17:36:27 UTC
[orc] branch master updated: ORC-474: Add a clear() method to
ColumnVectorBatch (#377)
This is an automated email from the ASF dual-hosted git repository.
xndai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/master by this push:
new 132175d ORC-474: Add a clear() method to ColumnVectorBatch (#377)
132175d is described below
commit 132175d2cbd6014f046a47b889013704879152b9
Author: Cedric Cellier <ri...@happyleptic.org>
AuthorDate: Wed Mar 27 18:36:23 2019 +0100
ORC-474: Add a clear() method to ColumnVectorBatch (#377)
So that users can merely call it after a batch is written to empty it
recursively before adding new elements for the next batch.
---
c++/include/orc/Vector.hh | 16 ++++++++++++++
c++/src/Vector.cc | 53 +++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 69 insertions(+)
diff --git a/c++/include/orc/Vector.hh b/c++/include/orc/Vector.hh
index 9391da3..5bdf27c 100644
--- a/c++/include/orc/Vector.hh
+++ b/c++/include/orc/Vector.hh
@@ -68,6 +68,12 @@ namespace orc {
virtual void resize(uint64_t capacity);
/**
+ * Empties the vector from all its elements, recursively.
+ * Do not alter the current capacity.
+ */
+ virtual void clear();
+
+ /**
* Heap memory used by the batch.
*/
virtual uint64_t getMemoryUsage();
@@ -89,6 +95,7 @@ namespace orc {
DataBuffer<int64_t> data;
std::string toString() const;
void resize(uint64_t capacity);
+ void clear();
uint64_t getMemoryUsage();
};
@@ -97,6 +104,7 @@ namespace orc {
virtual ~DoubleVectorBatch();
std::string toString() const;
void resize(uint64_t capacity);
+ void clear();
uint64_t getMemoryUsage();
DataBuffer<double> data;
@@ -107,6 +115,7 @@ namespace orc {
virtual ~StringVectorBatch();
std::string toString() const;
void resize(uint64_t capacity);
+ void clear();
uint64_t getMemoryUsage();
// pointers to the start of each string
@@ -154,6 +163,7 @@ namespace orc {
virtual ~StructVectorBatch();
std::string toString() const;
void resize(uint64_t capacity);
+ void clear();
uint64_t getMemoryUsage();
bool hasVariableLength();
@@ -165,6 +175,7 @@ namespace orc {
virtual ~ListVectorBatch();
std::string toString() const;
void resize(uint64_t capacity);
+ void clear();
uint64_t getMemoryUsage();
bool hasVariableLength();
@@ -183,6 +194,7 @@ namespace orc {
virtual ~MapVectorBatch();
std::string toString() const;
void resize(uint64_t capacity);
+ void clear();
uint64_t getMemoryUsage();
bool hasVariableLength();
@@ -203,6 +215,7 @@ namespace orc {
virtual ~UnionVectorBatch();
std::string toString() const;
void resize(uint64_t capacity);
+ void clear();
uint64_t getMemoryUsage();
bool hasVariableLength();
@@ -235,6 +248,7 @@ namespace orc {
virtual ~Decimal64VectorBatch();
std::string toString() const;
void resize(uint64_t capacity);
+ void clear();
uint64_t getMemoryUsage();
// total number of digits
@@ -260,6 +274,7 @@ namespace orc {
virtual ~Decimal128VectorBatch();
std::string toString() const;
void resize(uint64_t capacity);
+ void clear();
uint64_t getMemoryUsage();
// total number of digits
@@ -291,6 +306,7 @@ namespace orc {
virtual ~TimestampVectorBatch();
std::string toString() const;
void resize(uint64_t capacity);
+ void clear();
uint64_t getMemoryUsage();
// the number of seconds past 1 Jan 1970 00:00 UTC (aka time_t)
diff --git a/c++/src/Vector.cc b/c++/src/Vector.cc
index ae8ab0f..9043dde 100644
--- a/c++/src/Vector.cc
+++ b/c++/src/Vector.cc
@@ -49,6 +49,10 @@ namespace orc {
}
}
+ void ColumnVectorBatch::clear() {
+ numElements = 0;
+ }
+
uint64_t ColumnVectorBatch::getMemoryUsage() {
return static_cast<uint64_t>(notNull.capacity() * sizeof(char));
}
@@ -80,6 +84,10 @@ namespace orc {
}
}
+ void LongVectorBatch::clear() {
+ numElements = 0;
+ }
+
uint64_t LongVectorBatch::getMemoryUsage() {
return ColumnVectorBatch::getMemoryUsage() +
static_cast<uint64_t>(data.capacity() * sizeof(int64_t));
@@ -108,6 +116,10 @@ namespace orc {
}
}
+ void DoubleVectorBatch::clear() {
+ numElements = 0;
+ }
+
uint64_t DoubleVectorBatch::getMemoryUsage() {
return ColumnVectorBatch::getMemoryUsage()
+ static_cast<uint64_t>(data.capacity() * sizeof(double));
@@ -161,6 +173,10 @@ namespace orc {
}
}
+ void StringVectorBatch::clear() {
+ numElements = 0;
+ }
+
uint64_t StringVectorBatch::getMemoryUsage() {
return ColumnVectorBatch::getMemoryUsage()
+ static_cast<uint64_t>(data.capacity() * sizeof(char*)
@@ -194,6 +210,13 @@ namespace orc {
ColumnVectorBatch::resize(cap);
}
+ void StructVectorBatch::clear() {
+ for(size_t i=0; i < fields.size(); i++) {
+ fields[i]->clear();
+ }
+ numElements = 0;
+ }
+
uint64_t StructVectorBatch::getMemoryUsage() {
uint64_t memory = ColumnVectorBatch::getMemoryUsage();
for (unsigned int i=0; i < fields.size(); i++) {
@@ -235,6 +258,11 @@ namespace orc {
}
}
+ void ListVectorBatch::clear() {
+ numElements = 0;
+ elements->clear();
+ }
+
uint64_t ListVectorBatch::getMemoryUsage() {
return ColumnVectorBatch::getMemoryUsage()
+ static_cast<uint64_t>(offsets.capacity() * sizeof(int64_t))
@@ -270,6 +298,12 @@ namespace orc {
}
}
+ void MapVectorBatch::clear() {
+ keys->clear();
+ elements->clear();
+ numElements = 0;
+ }
+
uint64_t MapVectorBatch::getMemoryUsage() {
return ColumnVectorBatch::getMemoryUsage()
+ static_cast<uint64_t>(offsets.capacity() * sizeof(int64_t))
@@ -315,6 +349,13 @@ namespace orc {
}
}
+ void UnionVectorBatch::clear() {
+ for(size_t i=0; i < children.size(); i++) {
+ children[i]->clear();
+ }
+ numElements = 0;
+ }
+
uint64_t UnionVectorBatch::getMemoryUsage() {
uint64_t memory = ColumnVectorBatch::getMemoryUsage()
+ static_cast<uint64_t>(tags.capacity() * sizeof(unsigned char)
@@ -362,6 +403,10 @@ namespace orc {
}
}
+ void Decimal64VectorBatch::clear() {
+ numElements = 0;
+ }
+
uint64_t Decimal64VectorBatch::getMemoryUsage() {
return ColumnVectorBatch::getMemoryUsage()
+ static_cast<uint64_t>(
@@ -396,6 +441,10 @@ namespace orc {
}
}
+ void Decimal128VectorBatch::clear() {
+ numElements = 0;
+ }
+
uint64_t Decimal128VectorBatch::getMemoryUsage() {
return ColumnVectorBatch::getMemoryUsage()
+ static_cast<uint64_t>(values.capacity() * sizeof(Int128)
@@ -455,6 +504,10 @@ namespace orc {
}
}
+ void TimestampVectorBatch::clear() {
+ numElements = 0;
+ }
+
uint64_t TimestampVectorBatch::getMemoryUsage() {
return ColumnVectorBatch::getMemoryUsage()
+ static_cast<uint64_t>(