You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@orc.apache.org by majetideepak <gi...@git.apache.org> on 2018/06/03 15:35:29 UTC

[GitHub] orc pull request #273: ORC-343 Enable C++ writer to support RleV2

Github user majetideepak commented on a diff in the pull request:

    https://github.com/apache/orc/pull/273#discussion_r192593861
  
    --- Diff: c++/src/RLEv2.hh ---
    @@ -25,13 +25,89 @@
     
     #include <vector>
     
    +#define MIN_REPEAT 3
    +#define HIST_LEN 32
     namespace orc {
     
    -class RleDecoderV2 : public RleDecoder {
    +struct FixedBitSizes {
    +    enum FBS {
    +        ONE = 0, TWO, THREE, FOUR, FIVE, SIX, SEVEN, EIGHT, NINE, TEN, ELEVEN, TWELVE,
    +        THIRTEEN, FOURTEEN, FIFTEEN, SIXTEEN, SEVENTEEN, EIGHTEEN, NINETEEN,
    +        TWENTY, TWENTYONE, TWENTYTWO, TWENTYTHREE, TWENTYFOUR, TWENTYSIX,
    +        TWENTYEIGHT, THIRTY, THIRTYTWO, FORTY, FORTYEIGHT, FIFTYSIX, SIXTYFOUR, SIZE
    +    };
    +};
    +
    +enum EncodingType { SHORT_REPEAT=0, DIRECT=1, PATCHED_BASE=2, DELTA=3 };
    +
    +struct EncodingOption {
    +  EncodingType encoding;
    +  int64_t fixedDelta;
    +  int64_t gapVsPatchListCount;
    +  int64_t zigzagLiteralsCount;
    +  int64_t baseRedLiteralsCount;
    +  int64_t adjDeltasCount;
    +  uint32_t zzBits90p;
    +  uint32_t zzBits100p;
    +  uint32_t brBits95p;
    +  uint32_t brBits100p;
    +  uint32_t bitsDeltaMax;
    +  uint32_t patchWidth;
    +  uint32_t patchGapWidth;
    +  uint32_t patchLength;
    +  int64_t min;
    +  bool isFixedDelta;
    +};
    +
    +class RleEncoderV2 : public RleEncoder {
     public:
    +    RleEncoderV2(std::unique_ptr<BufferedOutputStream> outStream, bool hasSigned, bool alignBitPacking = true);
    --- End diff --
    
    `alignedBitPacking` is always true. Should we add a WriterOption to enable/disable it?
    Java uses the Encoding Strategy to choose this. C++ currently does not have this.
    ```
    java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java:144
    if (writer.getEncodingStrategy().equals(OrcFile.EncodingStrategy.SPEED)) {
         alignedBitpacking = true;
    }
    ```


---