You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by ga...@apache.org on 2022/11/25 02:20:30 UTC

[orc] branch main updated: ORC-1315: [C++] Fix byte to integer conversion failure on platforms with unsigned char type

This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/main by this push:
     new acb8d6cad ORC-1315: [C++] Fix byte to integer conversion failure on platforms with unsigned char type
acb8d6cad is described below

commit acb8d6cad138a8fc629053d487b55017d148ff9d
Author: Hao Zou <34...@users.noreply.github.com>
AuthorDate: Fri Nov 25 10:20:24 2022 +0800

    ORC-1315: [C++] Fix byte to integer conversion failure on platforms with unsigned char type
    
    This closes #1323
---
 c++/src/ByteRLE.cc      |  6 +++---
 c++/src/ColumnReader.cc |  7 ++++---
 c++/src/RLEv1.cc        |  2 +-
 c++/test/TestByteRle.cc | 26 +++++++++++++-------------
 4 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/c++/src/ByteRLE.cc b/c++/src/ByteRLE.cc
index e268a5645..aae68d64f 100644
--- a/c++/src/ByteRLE.cc
+++ b/c++/src/ByteRLE.cc
@@ -354,7 +354,7 @@ namespace orc {
     if (bufferStart == bufferEnd) {
       nextBuffer();
     }
-    return *(bufferStart++);
+    return static_cast<signed char>(*(bufferStart++));
   }
 
   void ByteRleDecoderImpl::readHeader() {
@@ -365,7 +365,7 @@ namespace orc {
     } else {
       remainingValues = static_cast<size_t>(ch) + MINIMUM_REPEAT;
       repeating = true;
-      value = readByte();
+      value = static_cast<char>(readByte());
     }
   }
 
@@ -458,7 +458,7 @@ namespace orc {
         if (notNull) {
           for (uint64_t i = 0; i < count; ++i) {
             if (notNull[position + i]) {
-              data[position + i] = readByte();
+              data[position + i] = static_cast<char>(readByte());
               consumed += 1;
             }
           }
diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc
index eea978b0e..eb23cd4a2 100644
--- a/c++/src/ColumnReader.cc
+++ b/c++/src/ColumnReader.cc
@@ -116,7 +116,7 @@ namespace orc {
   }
 
   /**
-   * Expand an array of bytes in place to the corresponding array of longs/.
+   * Expand an array of bytes in place to the corresponding array of integer.
    * Has to work backwards so that they data isn't clobbered during the
    * expansion.
    * @param buffer the array of chars and array of longs that need to be
@@ -128,8 +128,9 @@ namespace orc {
     if (sizeof(T) == sizeof(char)) {
       return;
     }
-    for (size_t i = numValues - 1; i < numValues; --i) {
-      buffer[i] = reinterpret_cast<char*>(buffer)[i];
+    for (uint64_t i = 0UL; i < numValues; ++i) {
+      buffer[numValues - 1 - i] =
+          static_cast<T>(reinterpret_cast<char*>(buffer)[numValues - 1 - i]);
     }
   }
 
diff --git a/c++/src/RLEv1.cc b/c++/src/RLEv1.cc
index f8431566c..b221e8b8a 100644
--- a/c++/src/RLEv1.cc
+++ b/c++/src/RLEv1.cc
@@ -146,7 +146,7 @@ namespace orc {
       bufferStart = static_cast<const char*>(bufferPointer);
       bufferEnd = bufferStart + bufferLength;
     }
-    return *(bufferStart++);
+    return static_cast<signed char>(*(bufferStart++));
   }
 
   uint64_t RleDecoderV1::readLong() {
diff --git a/c++/test/TestByteRle.cc b/c++/test/TestByteRle.cc
index e11988919..841a15779 100644
--- a/c++/test/TestByteRle.cc
+++ b/c++/test/TestByteRle.cc
@@ -50,8 +50,8 @@ namespace orc {
     char buffer[258];
     char notNull[266];
     char result[266];
-    buffer[0] = -128;
-    buffer[129] = -128;
+    buffer[0] = static_cast<char>(-128);
+    buffer[129] = static_cast<char>(-128);
     for (int i = 0; i < 128; ++i) {
       buffer[1 + i] = static_cast<char>(i);
       buffer[130 + i] = static_cast<char>(128 + i);
@@ -180,7 +180,7 @@ namespace orc {
         createByteRleDecoder(std::unique_ptr<orc::SeekableInputStream>(
                                  new SeekableArrayInputStream(buffer, ARRAY_SIZE(buffer), 3)),
                              getDefaultReaderMetrics());
-    std::vector<char> data(16, -1);
+    std::vector<char> data(16, static_cast<char>(-1));
     std::vector<char> notNull(data.size());
     for (size_t i = 0; i < data.size(); ++i) {
       notNull[i] = (i + 1) % 2;
@@ -192,14 +192,14 @@ namespace orc {
           EXPECT_EQ((i * data.size() + j) / 2, data[j])
               << "Output wrong at " << (i * data.size() + j);
         } else {
-          EXPECT_EQ(-1, data[j]) << "Output wrong at " << (i * data.size() + j);
+          EXPECT_EQ(static_cast<char>(-1), data[j]) << "Output wrong at " << (i * data.size() + j);
         }
       }
     }
     for (size_t i = 0; i < 8; ++i) {
       rle->next(data.data(), data.size(), notNull.data());
       for (size_t j = 0; j < data.size(); ++j) {
-        EXPECT_EQ(j % 2 == 0 ? -36 : -1, data[j])
+        EXPECT_EQ(j % 2 == 0 ? static_cast<char>(-36) : static_cast<char>(-1), data[j])
             << "Output wrong at " << (i * data.size() + j + 32);
       }
     }
@@ -212,26 +212,26 @@ namespace orc {
         createByteRleDecoder(std::unique_ptr<orc::SeekableInputStream>(
                                  new SeekableArrayInputStream(buffer, ARRAY_SIZE(buffer))),
                              getDefaultReaderMetrics());
-    std::vector<char> data(16, -1);
+    std::vector<char> data(16, static_cast<char>(-1));
     std::vector<char> allNull(data.size(), 0);
     std::vector<char> noNull(data.size(), 1);
     rle->next(data.data(), data.size(), allNull.data());
     for (size_t i = 0; i < data.size(); ++i) {
-      EXPECT_EQ(-1, data[i]) << "Output wrong at " << i;
+      EXPECT_EQ(static_cast<char>(-1), data[i]) << "Output wrong at " << i;
     }
     rle->next(data.data(), data.size(), noNull.data());
     for (size_t i = 0; i < data.size(); ++i) {
       EXPECT_EQ(i, data[i]) << "Output wrong at " << i;
-      data[i] = -1;
+      data[i] = static_cast<char>(-1);
     }
     rle->next(data.data(), data.size(), allNull.data());
     for (size_t i = 0; i < data.size(); ++i) {
-      EXPECT_EQ(-1, data[i]) << "Output wrong at " << i;
+      EXPECT_EQ(static_cast<char>(-1), data[i]) << "Output wrong at " << i;
     }
     for (size_t i = 0; i < 4; ++i) {
       rle->next(data.data(), data.size(), noNull.data());
       for (size_t j = 0; j < data.size(); ++j) {
-        EXPECT_EQ(-36, data[j]) << "Output wrong at " << i;
+        EXPECT_EQ(static_cast<char>(-36), data[j]) << "Output wrong at " << i;
       }
     }
     rle->next(data.data(), data.size(), allNull.data());
@@ -989,7 +989,7 @@ namespace orc {
     someNull[1] = 1;
     std::vector<char> allNull(data.size(), 0);
     for (size_t i = 0; i < 16384; i += 5) {
-      data.assign(data.size(), -1);
+      data.assign(data.size(), static_cast<char>(-1));
       rle->next(data.data(), data.size(), someNull.data());
       EXPECT_EQ(0, data[0]) << "Output wrong at " << i;
       EXPECT_EQ(0, data[2]) << "Output wrong at " << i;
@@ -998,7 +998,7 @@ namespace orc {
         rle->skip(4);
       }
       rle->skip(0);
-      data.assign(data.size(), -1);
+      data.assign(data.size(), static_cast<char>(-1));
       rle->next(data.data(), data.size(), allNull.data());
       for (size_t j = 0; j < data.size(); ++j) {
         EXPECT_EQ(0, data[j]) << "Output wrong at " << i << ", " << j;
@@ -1230,7 +1230,7 @@ namespace orc {
       rle->seek(location);
       rle->next(data.data(), 1, noNull.data());
       EXPECT_EQ(i < 8192 ? i & 1 : (i / 3) & 1, data[i]) << "Output wrong at " << i;
-      data[0] = -1;
+      data[0] = static_cast<char>(-1);
       rle->next(data.data(), 1, allNull.data());
       EXPECT_EQ(0, data[0]) << "Output wrong at " << i;
     } while (i != 0);