You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@iceberg.apache.org by GitBox <gi...@apache.org> on 2021/07/08 15:00:16 UTC

[GitHub] [iceberg] rymurr commented on a change in pull request #2746: Reduce code duplication in Arrow code

rymurr commented on a change in pull request #2746:
URL: https://github.com/apache/iceberg/pull/2746#discussion_r666272918



##########
File path: arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDictionaryEncodedParquetValuesReader.java
##########
@@ -40,413 +40,183 @@ public VectorizedDictionaryEncodedParquetValuesReader(int maxDefLevel, boolean s
     super(maxDefLevel, setValidityVector);
   }
 
-  void readBatchOfDictionaryIds(IntVector intVector, int startOffset, int numValuesToRead,
-                                NullabilityHolder nullabilityHolder) {
-    int left = numValuesToRead;
-    int idx = startOffset;
-    while (left > 0) {
-      if (this.currentCount == 0) {
-        this.readNextGroup();
+  abstract class BaseDictEncodedReader {
+    public void nextBatch(
+        FieldVector vector, int startOffset, int numValuesToRead, Dictionary dict,
+        NullabilityHolder nullabilityHolder, int typeWidth) {
+      int left = numValuesToRead;
+      int idx = startOffset;
+      while (left > 0) {
+        if (currentCount == 0) {
+          readNextGroup();
+        }
+        int numValues = Math.min(left, currentCount);
+        for (int i = 0; i < numValues; i++) {
+          int index = idx * typeWidth;
+          if (typeWidth == -1) {
+            index = idx;
+          }
+          if (Mode.RLE.equals(mode)) {
+            nextVal(vector, dict, index, currentValue, typeWidth);
+          } else if (Mode.PACKED.equals(mode)) {
+            nextVal(vector, dict, index, packedValuesBuffer[packedValuesBufferIdx++], typeWidth);
+          }
+          nullabilityHolder.setNotNull(idx);
+          if (setArrowValidityVector) {
+            BitVectorHelper.setBit(vector.getValidityBuffer(), idx);
+          }
+          idx++;
+        }
+        left -= numValues;
+        currentCount -= numValues;
       }
-      int numValues = Math.min(left, this.currentCount);
-      switch (mode) {
-        case RLE:
-          for (int i = 0; i < numValues; i++) {
-            intVector.set(idx, currentValue);
-            setNotNull(intVector, nullabilityHolder, idx);
-            idx++;
-          }
-          break;
-        case PACKED:
-          for (int i = 0; i < numValues; i++) {
-            intVector.set(idx, packedValuesBuffer[packedValuesBufferIdx++]);
-            setNotNull(intVector, nullabilityHolder, idx);
-            idx++;
-          }
-          break;
-      }
-      left -= numValues;
-      currentCount -= numValues;
     }
+
+    protected abstract void nextVal(FieldVector vector, Dictionary dict, int idx, int currentVal, int typeWidth);
   }
 
-  void readBatchOfDictionaryEncodedLongs(FieldVector vector, int startOffset, int numValuesToRead, Dictionary dict,
-                                         NullabilityHolder nullabilityHolder, int typeWidth) {
-    int left = numValuesToRead;
-    int idx = startOffset;
-    while (left > 0) {
-      if (this.currentCount == 0) {
-        this.readNextGroup();
-      }
-      int numValues = Math.min(left, this.currentCount);
-      switch (mode) {
-        case RLE:
-          for (int i = 0; i < numValues; i++) {
-            vector.getDataBuffer().setLong(idx * typeWidth, dict.decodeToLong(currentValue));
-            setNotNull(vector, nullabilityHolder, idx);
-            idx++;
-          }
-          break;
-        case PACKED:
-          for (int i = 0; i < numValues; i++) {
-            vector.getDataBuffer()
-                .setLong(idx * typeWidth, dict.decodeToLong(packedValuesBuffer[packedValuesBufferIdx++]));
-            setNotNull(vector, nullabilityHolder, idx);
-            idx++;
-          }
-          break;
-      }
-      left -= numValues;
-      currentCount -= numValues;
+  class DictionaryIdReader extends BaseDictEncodedReader {
+    @Override
+    protected void nextVal(FieldVector vector, Dictionary dict, int idx, int currentVal, int typeWidth) {
+      ((IntVector) vector).set(idx, currentVal);

Review comment:
       Any reason this is cast to `IntVector` and the others are directly manipulating the data buffer?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org