You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@orc.apache.org by majetideepak <gi...@git.apache.org> on 2018/06/03 15:35:29 UTC
[GitHub] orc pull request #273: ORC-343 Enable C++ writer to support RleV2
Github user majetideepak commented on a diff in the pull request:
https://github.com/apache/orc/pull/273#discussion_r192593861
--- Diff: c++/src/RLEv2.hh ---
@@ -25,13 +25,89 @@
#include <vector>
+#define MIN_REPEAT 3
+#define HIST_LEN 32
namespace orc {
-class RleDecoderV2 : public RleDecoder {
+struct FixedBitSizes {
+ enum FBS {
+ ONE = 0, TWO, THREE, FOUR, FIVE, SIX, SEVEN, EIGHT, NINE, TEN, ELEVEN, TWELVE,
+ THIRTEEN, FOURTEEN, FIFTEEN, SIXTEEN, SEVENTEEN, EIGHTEEN, NINETEEN,
+ TWENTY, TWENTYONE, TWENTYTWO, TWENTYTHREE, TWENTYFOUR, TWENTYSIX,
+ TWENTYEIGHT, THIRTY, THIRTYTWO, FORTY, FORTYEIGHT, FIFTYSIX, SIXTYFOUR, SIZE
+ };
+};
+
+enum EncodingType { SHORT_REPEAT=0, DIRECT=1, PATCHED_BASE=2, DELTA=3 };
+
+struct EncodingOption {
+ EncodingType encoding;
+ int64_t fixedDelta;
+ int64_t gapVsPatchListCount;
+ int64_t zigzagLiteralsCount;
+ int64_t baseRedLiteralsCount;
+ int64_t adjDeltasCount;
+ uint32_t zzBits90p;
+ uint32_t zzBits100p;
+ uint32_t brBits95p;
+ uint32_t brBits100p;
+ uint32_t bitsDeltaMax;
+ uint32_t patchWidth;
+ uint32_t patchGapWidth;
+ uint32_t patchLength;
+ int64_t min;
+ bool isFixedDelta;
+};
+
+class RleEncoderV2 : public RleEncoder {
public:
+ RleEncoderV2(std::unique_ptr<BufferedOutputStream> outStream, bool hasSigned, bool alignBitPacking = true);
--- End diff --
`alignedBitPacking` is always true. Should we add a WriterOption to enable/disable it?
Java uses the Encoding Strategy to choose this. C++ currently does not have this.
```
java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java:144
if (writer.getEncodingStrategy().equals(OrcFile.EncodingStrategy.SPEED)) {
alignedBitpacking = true;
}
```
---