You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2021/08/10 18:25:50 UTC

[orc] branch main updated: ORC-931: Modify RunLengthIntegerWriterV2 code to improve readability (#845)

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/main by this push:
     new e462071  ORC-931: Modify RunLengthIntegerWriterV2 code to improve readability (#845)
e462071 is described below

commit e4620713ee51ea9bfc6d032d8dd9e95cf0962e56
Author: guiyanakaung <gu...@gmail.com>
AuthorDate: Wed Aug 11 02:25:41 2021 +0800

    ORC-931: Modify RunLengthIntegerWriterV2 code to improve readability (#845)
    
    ### What changes were proposed in this pull request?
    
    RunLengthIntegerWriterV2.java
    512-546 line
    ```java
      if (diffBitsLH > 1) {
          for (int i = 0; i < numLiterals; i++) {
            baseRedLiterals[i] = literals[i] - min;
          }
          brBits95p = utils.percentileBits(baseRedLiterals, 0, numLiterals, 0.95);
          brBits100p = utils.percentileBits(baseRedLiterals, 0, numLiterals, 1.0);
          if ((brBits100p - brBits95p) != 0 && Math.abs(min) < BASE_VALUE_LIMIT) {
            encoding = EncodingType.PATCHED_BASE;
            preparePatchedBlob();
            return;
          } else {
            encoding = EncodingType.DIRECT;
            return;
          }
        } else {
          // if difference in bits between 95th percentile and 100th percentile is
          // 0, then patch length will become 0. Hence we will fallback to direct
          encoding = EncodingType.DIRECT;
          return;
        }
    ```
    All three conditional branch logics have been completed and the return statement is redundant.
    
    691-704 line
    ```java
          if (fixedRunLength < MIN_REPEAT) {
              variableRunLength = fixedRunLength;
              fixedRunLength = 0;
              determineEncoding();
              writeValues();
            } else if (fixedRunLength >= MIN_REPEAT
                && fixedRunLength <= MAX_SHORT_REPEAT_LENGTH) {
              encoding = EncodingType.SHORT_REPEAT;
              writeValues();
            } else {
              encoding = EncodingType.DELTA;
              isFixedDelta = true;
              writeValues();
            }
    ```
    fixedRunLength >= MIN_REPEAT is redundant, the previous condition already ensures this.
    Extract the writeValues() method to the end. It seems better for conditional judgements to deal only with encoding and state.
    
    772-781 line
    ```java
              if (fixedRunLength >= MIN_REPEAT) {
                if (fixedRunLength <= MAX_SHORT_REPEAT_LENGTH) {
                  encoding = EncodingType.SHORT_REPEAT;
                  writeValues();
                } else {
                  encoding = EncodingType.DELTA;
                  isFixedDelta = true;
                  writeValues();
                }
              }
    ```
    Ditto
    
    ### Why are the changes needed?
    
    Modify code to improve readability
    
    ### How was this patch tested?
    
    Pass the CIs
    
    Co-authored-by: zhangyiqun <zh...@huya.com>
---
 .../java/org/apache/orc/impl/RunLengthIntegerWriterV2.java  | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/impl/RunLengthIntegerWriterV2.java b/java/core/src/java/org/apache/orc/impl/RunLengthIntegerWriterV2.java
index 49a42cb..a0e1e78 100644
--- a/java/core/src/java/org/apache/orc/impl/RunLengthIntegerWriterV2.java
+++ b/java/core/src/java/org/apache/orc/impl/RunLengthIntegerWriterV2.java
@@ -533,16 +533,13 @@ public class RunLengthIntegerWriterV2 implements IntegerWriter {
       if ((brBits100p - brBits95p) != 0 && Math.abs(min) < BASE_VALUE_LIMIT) {
         encoding = EncodingType.PATCHED_BASE;
         preparePatchedBlob();
-        return;
       } else {
         encoding = EncodingType.DIRECT;
-        return;
       }
     } else {
       // if difference in bits between 95th percentile and 100th percentile is
       // 0, then patch length will become 0. Hence we will fallback to direct
       encoding = EncodingType.DIRECT;
-      return;
     }
   }
 
@@ -692,16 +689,13 @@ public class RunLengthIntegerWriterV2 implements IntegerWriter {
           variableRunLength = fixedRunLength;
           fixedRunLength = 0;
           determineEncoding();
-          writeValues();
-        } else if (fixedRunLength >= MIN_REPEAT
-            && fixedRunLength <= MAX_SHORT_REPEAT_LENGTH) {
+        } else if (fixedRunLength <= MAX_SHORT_REPEAT_LENGTH) {
           encoding = EncodingType.SHORT_REPEAT;
-          writeValues();
         } else {
           encoding = EncodingType.DELTA;
           isFixedDelta = true;
-          writeValues();
         }
+        writeValues();
       }
     }
     output.flush();
@@ -772,12 +766,11 @@ public class RunLengthIntegerWriterV2 implements IntegerWriter {
           if (fixedRunLength >= MIN_REPEAT) {
             if (fixedRunLength <= MAX_SHORT_REPEAT_LENGTH) {
               encoding = EncodingType.SHORT_REPEAT;
-              writeValues();
             } else {
               encoding = EncodingType.DELTA;
               isFixedDelta = true;
-              writeValues();
             }
+            writeValues();
           }
 
           // if fixed run length is <MIN_REPEAT and current value is