You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by ga...@apache.org on 2019/05/21 02:41:50 UTC

[orc] branch master updated: [ORC-499][C++] Change databuffer resize and refill previous data

This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/master by this push:
     new a38ac1f  [ORC-499][C++] Change databuffer resize and refill previous data
a38ac1f is described below

commit a38ac1fcd28ee2c5732486e612648bc1bcbe5d76
Author: Zherui <71...@qq.com>
AuthorDate: Mon May 20 22:41:45 2019 -0400

    [ORC-499][C++] Change databuffer resize and refill previous data
    
    This fixes #392
---
 tools/src/CSVFileImport.cc | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/tools/src/CSVFileImport.cc b/tools/src/CSVFileImport.cc
index c814879..a98b511 100644
--- a/tools/src/CSVFileImport.cc
+++ b/tools/src/CSVFileImport.cc
@@ -89,8 +89,17 @@ void fillStringValues(const std::vector<std::string>& data,
       hasNull = true;
     } else {
       batch->notNull[i] = 1;
-      if (buffer.size() - offset < col.size()) {
-        buffer.reserve(buffer.size() * 2);
+      char* oldBufferAddress = buffer.data();
+      // Resize the buffer in case buffer does not have remaining space to store the next string.
+      while (buffer.size() - offset < col.size()) {
+        buffer.resize(buffer.size() * 2);
+      }
+      char* newBufferAddress = buffer.data();
+      // Refill stringBatch->data with the new addresses, if buffer's address has changed.
+      if (newBufferAddress != oldBufferAddress){
+        for (uint64_t refillIndex = 0; refillIndex < i; ++refillIndex){
+        stringBatch->data[refillIndex] = stringBatch->data[refillIndex] - oldBufferAddress + newBufferAddress;
+        }
       }
       memcpy(buffer.data() + offset,
              col.c_str(),