You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by GitBox <gi...@apache.org> on 2022/09/19 23:13:22 UTC

[GitHub] [pinot] siddharthteotia commented on a diff in pull request #9427: [multistage] support NULL in data blocks.

siddharthteotia commented on code in PR #9427:
URL: https://github.com/apache/pinot/pull/9427#discussion_r974757198


##########
pinot-core/src/main/java/org/apache/pinot/core/common/datablock/DataBlockBuilder.java:
##########
@@ -173,81 +183,133 @@ public static RowDataBlock buildFromRows(List<Object[]> rows, @Nullable RoaringB
             break;
           default:
             throw new IllegalStateException(
-                String.format("Unsupported data type: %s for column: %s", rowBuilder._columnDataType[i],
-                    rowBuilder._dataSchema.getColumnName(i)));
+                String.format("Unsupported data type: %s for column: %s", rowBuilder._columnDataTypes[colId],
+                    rowBuilder._dataSchema.getColumnName(colId)));
         }
       }
       rowBuilder._fixedSizeDataByteArrayOutputStream.write(byteBuffer.array(), 0, byteBuffer.position());
     }
     // Write null bitmaps after writing data.
-    if (colNullBitmaps != null) {
-      for (RoaringBitmap nullBitmap : colNullBitmaps) {
-        rowBuilder.setNullRowIds(nullBitmap);
-      }
+    for (RoaringBitmap nullBitmap : nullBitmaps) {
+      rowBuilder.setNullRowIds(nullBitmap);
     }
     return buildRowBlock(rowBuilder);
   }
 
-  public static ColumnarDataBlock buildFromColumns(List<Object[]> columns, @Nullable RoaringBitmap[] colNullBitmaps,
-      DataSchema dataSchema)
+  public static ColumnarDataBlock buildFromColumns(List<Object[]> columns, DataSchema dataSchema)
       throws IOException {
     DataBlockBuilder columnarBuilder = new DataBlockBuilder(dataSchema, BaseDataBlock.Type.COLUMNAR);
-    for (int i = 0; i < columns.size(); i++) {
-      Object[] column = columns.get(i);
+    RoaringBitmap[] nullBitmaps = new RoaringBitmap[columnarBuilder._numColumns];
+    for (int i = 0; i < columnarBuilder._numColumns; i++) {
+      nullBitmaps[i] = new RoaringBitmap();
+    }
+    for (int colId = 0; colId < columns.size(); colId++) {
+      Object[] column = columns.get(colId);
       columnarBuilder._numRows = column.length;
-      ByteBuffer byteBuffer = ByteBuffer.allocate(columnarBuilder._numRows * columnarBuilder._columnSizeInBytes[i]);
-      switch (columnarBuilder._columnDataType[i]) {
+      ByteBuffer byteBuffer = ByteBuffer.allocate(columnarBuilder._numRows * columnarBuilder._columnSizeInBytes[colId]);
+      Object value;
+      switch (columnarBuilder._columnDataTypes[colId]) {
         // Single-value column
         case INT:
-          for (Object value : column) {
+          for (int rowId = 0; rowId < columnarBuilder._numRows; rowId++) {
+            value = column[rowId];
+            if (value == null) {
+              nullBitmaps[colId].add(rowId);
+              value = NullValueUtils.getDefaultNullValue(columnarBuilder._columnDataTypes[colId]);
+            }
             byteBuffer.putInt(((Number) value).intValue());
           }
           break;
         case LONG:
-          for (Object value : column) {
+          for (int rowId = 0; rowId < columnarBuilder._numRows; rowId++) {
+            value = column[rowId];
+            if (value == null) {
+              nullBitmaps[colId].add(rowId);
+              value = NullValueUtils.getDefaultNullValue(columnarBuilder._columnDataTypes[colId]);
+            }
             byteBuffer.putLong(((Number) value).longValue());
           }
           break;
         case FLOAT:
-          for (Object value : column) {
+          for (int rowId = 0; rowId < columnarBuilder._numRows; rowId++) {
+            value = column[rowId];
+            if (value == null) {
+              nullBitmaps[colId].add(rowId);
+              value = NullValueUtils.getDefaultNullValue(columnarBuilder._columnDataTypes[colId]);

Review Comment:
   Ideally if the null bitmap is working and bit 1 / 0 indicates whether values is non-NULL or NULL, then we should not be encoding the default null value in the data block. 
   
   The point of using null bitmaps to denote NULLs is to not necessarily have to encode placeholder values as long as the reader does isNull(int index) check every for every row read in the column. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org