You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by kh...@apache.org on 2022/12/02 11:42:34 UTC

[pinot] branch master updated: Add memory optimized dimension table (#9802)

This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 42354206b7 Add memory optimized dimension table (#9802)
42354206b7 is described below

commit 42354206b7cc79aa02ede20e73f6b0f36ee1d9d9
Author: Kartik Khare <kh...@gmail.com>
AuthorDate: Fri Dec 2 17:12:25 2022 +0530

    Add memory optimized dimension table (#9802)
    
    * Add mem optimized dim table
    
    * rename config and remove template variable
    
    * Add dimensionTable config to table config builder and serializers
    
    * fix linting
    
    * WIP: close segments after memtable is done
    
    * Do not close segments when data is not preLoaded
    
    * Fix segment close logic
    
    * closing segment data manager inside dimension table
    
    * Fix linting
    
    Co-authored-by: Kartik Khare <kh...@Kartiks-MacBook-Pro.local>
---
 .../common/utils/config/TableConfigUtils.java      | 13 ++-
 .../core/data/manager/offline/DimensionTable.java  | 36 ++-------
 .../manager/offline/DimensionTableDataManager.java | 85 ++++++++++++++++++--
 ...ionTable.java => FastLookupDimensionTable.java} | 30 ++++---
 .../data/manager/offline/LookupRecordLocation.java | 46 +++++++++++
 .../offline/MemoryOptimizedDimensionTable.java     | 93 ++++++++++++++++++++++
 .../offline/DimensionTableDataManagerTest.java     | 57 +++++++++++++
 .../spi/config/table/DimensionTableConfig.java     | 37 +++++++++
 .../apache/pinot/spi/config/table/TableConfig.java | 15 ++++
 .../spi/utils/builder/TableConfigBuilder.java      | 11 ++-
 10 files changed, 371 insertions(+), 52 deletions(-)

diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/config/TableConfigUtils.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/config/TableConfigUtils.java
index 9735a0f632..8abb0ea964 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/utils/config/TableConfigUtils.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/config/TableConfigUtils.java
@@ -29,6 +29,7 @@ import java.util.Map;
 import org.apache.commons.collections.MapUtils;
 import org.apache.helix.zookeeper.datamodel.ZNRecord;
 import org.apache.pinot.spi.config.table.DedupConfig;
+import org.apache.pinot.spi.config.table.DimensionTableConfig;
 import org.apache.pinot.spi.config.table.FieldConfig;
 import org.apache.pinot.spi.config.table.IndexingConfig;
 import org.apache.pinot.spi.config.table.QueryConfig;
@@ -139,6 +140,12 @@ public class TableConfigUtils {
       dedupConfig = JsonUtils.stringToObject(dedupConfigString, DedupConfig.class);
     }
 
+    DimensionTableConfig dimensionTableConfig = null;
+    String dimensionTableConfigString = simpleFields.get(TableConfig.DIMENSION_TABLE_CONFIG_KEY);
+    if (dimensionTableConfigString != null) {
+      dimensionTableConfig = JsonUtils.stringToObject(dimensionTableConfigString, DimensionTableConfig.class);
+    }
+
     IngestionConfig ingestionConfig = null;
     String ingestionConfigString = simpleFields.get(TableConfig.INGESTION_CONFIG_KEY);
     if (ingestionConfigString != null) {
@@ -175,7 +182,7 @@ public class TableConfigUtils {
 
     return new TableConfig(tableName, tableType, validationConfig, tenantConfig, indexingConfig, customConfig,
         quotaConfig, taskConfig, routingConfig, queryConfig, instanceAssignmentConfigMap,
-        fieldConfigList, upsertConfig, dedupConfig, ingestionConfig, tierConfigList, isDimTable,
+        fieldConfigList, upsertConfig, dedupConfig, dimensionTableConfig, ingestionConfig, tierConfigList, isDimTable,
         tunerConfigList, instancePartitionsMap, segmentAssignmentConfigMap);
   }
 
@@ -227,6 +234,10 @@ public class TableConfigUtils {
     if (dedupConfig != null) {
       simpleFields.put(TableConfig.DEDUP_CONFIG_KEY, JsonUtils.objectToString(dedupConfig));
     }
+    DimensionTableConfig dimensionTableConfig = tableConfig.getDimensionTableConfig();
+    if (dimensionTableConfig != null) {
+      simpleFields.put(TableConfig.DIMENSION_TABLE_CONFIG_KEY, JsonUtils.objectToString(dimensionTableConfig));
+    }
     IngestionConfig ingestionConfig = tableConfig.getIngestionConfig();
     if (ingestionConfig != null) {
       simpleFields.put(TableConfig.INGESTION_CONFIG_KEY, JsonUtils.objectToString(ingestionConfig));
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/DimensionTable.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/DimensionTable.java
index 6485f4456c..b98d1d51e0 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/DimensionTable.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/DimensionTable.java
@@ -18,44 +18,20 @@
  */
 package org.apache.pinot.core.data.manager.offline;
 
-import java.util.HashMap;
+import java.io.Closeable;
 import java.util.List;
-import java.util.Map;
 import org.apache.pinot.spi.data.FieldSpec;
-import org.apache.pinot.spi.data.Schema;
 import org.apache.pinot.spi.data.readers.GenericRow;
 import org.apache.pinot.spi.data.readers.PrimaryKey;
 
 
-class DimensionTable {
+public interface DimensionTable extends Closeable {
 
-  private final Map<PrimaryKey, GenericRow> _lookupTable;
-  private final Schema _tableSchema;
-  private final List<String> _primaryKeyColumns;
+  List<String> getPrimaryKeyColumns();
 
-  DimensionTable(Schema tableSchema, List<String> primaryKeyColumns) {
-    this(tableSchema, primaryKeyColumns, new HashMap<>());
-  }
+  GenericRow get(PrimaryKey pk);
 
-  DimensionTable(Schema tableSchema, List<String> primaryKeyColumns, Map<PrimaryKey, GenericRow> lookupTable) {
-    _lookupTable = lookupTable;
-    _tableSchema = tableSchema;
-    _primaryKeyColumns = primaryKeyColumns;
-  }
+  boolean isEmpty();
 
-  List<String> getPrimaryKeyColumns() {
-    return _primaryKeyColumns;
-  }
-
-  GenericRow get(PrimaryKey pk) {
-    return _lookupTable.get(pk);
-  }
-
-  boolean isEmpty() {
-    return _lookupTable.isEmpty();
-  }
-
-  FieldSpec getFieldSpecFor(String columnName) {
-    return _tableSchema.getFieldSpecFor(columnName);
-  }
+  FieldSpec getFieldSpecFor(String columnName);
 }
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/DimensionTableDataManager.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/DimensionTableDataManager.java
index 1e9c8fbbcd..7031c135dc 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/DimensionTableDataManager.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/DimensionTableDataManager.java
@@ -20,6 +20,7 @@ package org.apache.pinot.core.data.manager.offline;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -32,6 +33,8 @@ import org.apache.pinot.segment.local.data.manager.SegmentDataManager;
 import org.apache.pinot.segment.local.segment.readers.PinotSegmentRecordReader;
 import org.apache.pinot.segment.spi.ImmutableSegment;
 import org.apache.pinot.segment.spi.IndexSegment;
+import org.apache.pinot.spi.config.table.DimensionTableConfig;
+import org.apache.pinot.spi.config.table.TableConfig;
 import org.apache.pinot.spi.data.FieldSpec;
 import org.apache.pinot.spi.data.Schema;
 import org.apache.pinot.spi.data.readers.GenericRow;
@@ -78,16 +81,32 @@ public class DimensionTableDataManager extends OfflineTableDataManager {
       AtomicReferenceFieldUpdater.newUpdater(DimensionTableDataManager.class, DimensionTable.class, "_dimensionTable");
 
   private volatile DimensionTable _dimensionTable;
+  private boolean _disablePreload = false;
 
   @Override
   protected void doInit() {
     super.doInit();
     Schema schema = ZKMetadataProvider.getTableSchema(_propertyStore, _tableNameWithType);
     Preconditions.checkState(schema != null, "Failed to find schema for dimension table: %s", _tableNameWithType);
+
     List<String> primaryKeyColumns = schema.getPrimaryKeyColumns();
     Preconditions.checkState(CollectionUtils.isNotEmpty(primaryKeyColumns),
         "Primary key columns must be configured for dimension table: %s", _tableNameWithType);
-    _dimensionTable = new DimensionTable(schema, primaryKeyColumns);
+
+    TableConfig tableConfig = ZKMetadataProvider.getTableConfig(_propertyStore, _tableNameWithType);
+    if (tableConfig != null) {
+      DimensionTableConfig dimensionTableConfig = tableConfig.getDimensionTableConfig();
+      if (dimensionTableConfig != null) {
+        _disablePreload = dimensionTableConfig.isDisablePreload();
+      }
+    }
+
+    if (_disablePreload) {
+      _dimensionTable = new MemoryOptimizedDimensionTable(schema, primaryKeyColumns, Collections.emptyMap(),
+          Collections.emptyList(), this);
+    } else {
+      _dimensionTable = new FastLookupDimensionTable(schema, primaryKeyColumns, new HashMap<>());
+    }
   }
 
   @Override
@@ -117,6 +136,19 @@ public class DimensionTableDataManager extends OfflineTableDataManager {
     }
   }
 
+  @Override
+  protected void doShutdown() {
+    closeDimensionTable(_dimensionTable);
+  }
+
+  private void closeDimensionTable(DimensionTable dimensionTable) {
+    try {
+      dimensionTable.close();
+    } catch (Exception e) {
+      _logger.warn("Cannot close dimension table: {}", _tableNameWithType, e);
+    }
+  }
+
   /**
    * `loadLookupTable()` reads contents of the DimensionTable into _lookupTable HashMap for fast lookup.
    */
@@ -125,21 +157,28 @@ public class DimensionTableDataManager extends OfflineTableDataManager {
     DimensionTable replacement;
     do {
       snapshot = _dimensionTable;
-      replacement = createDimensionTable();
+      if (_disablePreload) {
+        replacement = createMemOptimisedDimensionTable();
+      } else {
+        replacement = createFastLookupDimensionTable();
+      }
     } while (!UPDATER.compareAndSet(this, snapshot, replacement));
+
+    closeDimensionTable(snapshot);
   }
 
-  private DimensionTable createDimensionTable() {
+  private DimensionTable createFastLookupDimensionTable() {
     Schema schema = ZKMetadataProvider.getTableSchema(_propertyStore, _tableNameWithType);
     Preconditions.checkState(schema != null, "Failed to find schema for dimension table: %s", _tableNameWithType);
+
     List<String> primaryKeyColumns = schema.getPrimaryKeyColumns();
     Preconditions.checkState(CollectionUtils.isNotEmpty(primaryKeyColumns),
         "Primary key columns must be configured for dimension table: %s", _tableNameWithType);
 
     Map<PrimaryKey, GenericRow> lookupTable = new HashMap<>();
-    List<SegmentDataManager> segmentManagers = acquireAllSegments();
+    List<SegmentDataManager> segmentDataManagers = acquireAllSegments();
     try {
-      for (SegmentDataManager segmentManager : segmentManagers) {
+      for (SegmentDataManager segmentManager : segmentDataManagers) {
         IndexSegment indexSegment = segmentManager.getSegment();
         int numTotalDocs = indexSegment.getSegmentMetadata().getTotalDocs();
         if (numTotalDocs > 0) {
@@ -156,14 +195,46 @@ public class DimensionTableDataManager extends OfflineTableDataManager {
           }
         }
       }
-      return new DimensionTable(schema, primaryKeyColumns, lookupTable);
+      return new FastLookupDimensionTable(schema, primaryKeyColumns, lookupTable);
     } finally {
-      for (SegmentDataManager segmentManager : segmentManagers) {
+      for (SegmentDataManager segmentManager : segmentDataManagers) {
         releaseSegment(segmentManager);
       }
     }
   }
 
+  private DimensionTable createMemOptimisedDimensionTable() {
+    Schema schema = ZKMetadataProvider.getTableSchema(_propertyStore, _tableNameWithType);
+    Preconditions.checkState(schema != null, "Failed to find schema for dimension table: %s", _tableNameWithType);
+
+    List<String> primaryKeyColumns = schema.getPrimaryKeyColumns();
+    Preconditions.checkState(CollectionUtils.isNotEmpty(primaryKeyColumns),
+        "Primary key columns must be configured for dimension table: %s", _tableNameWithType);
+
+    Map<PrimaryKey, LookupRecordLocation> lookupTable = new HashMap<>();
+    List<SegmentDataManager> segmentDataManagers = acquireAllSegments();
+    for (SegmentDataManager segmentManager : segmentDataManagers) {
+      IndexSegment indexSegment = segmentManager.getSegment();
+      int numTotalDocs = indexSegment.getSegmentMetadata().getTotalDocs();
+      if (numTotalDocs > 0) {
+        try {
+          PinotSegmentRecordReader recordReader = new PinotSegmentRecordReader();
+          recordReader.init(indexSegment);
+          for (int i = 0; i < numTotalDocs; i++) {
+            GenericRow row = new GenericRow();
+            recordReader.getRecord(i, row);
+            lookupTable.put(row.getPrimaryKey(primaryKeyColumns), new LookupRecordLocation(recordReader, i));
+          }
+        } catch (Exception e) {
+          throw new RuntimeException(
+              "Caught exception while reading records from segment: " + indexSegment.getSegmentName());
+        }
+      }
+    }
+    return new MemoryOptimizedDimensionTable(schema, primaryKeyColumns, lookupTable,
+        segmentDataManagers, this);
+  }
+
   public boolean isPopulated() {
     return !_dimensionTable.isEmpty();
   }
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/DimensionTable.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/FastLookupDimensionTable.java
similarity index 72%
copy from pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/DimensionTable.java
copy to pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/FastLookupDimensionTable.java
index 6485f4456c..ae6776aba3 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/DimensionTable.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/FastLookupDimensionTable.java
@@ -18,7 +18,7 @@
  */
 package org.apache.pinot.core.data.manager.offline;
 
-import java.util.HashMap;
+import java.io.IOException;
 import java.util.List;
 import java.util.Map;
 import org.apache.pinot.spi.data.FieldSpec;
@@ -27,35 +27,41 @@ import org.apache.pinot.spi.data.readers.GenericRow;
 import org.apache.pinot.spi.data.readers.PrimaryKey;
 
 
-class DimensionTable {
+class FastLookupDimensionTable implements DimensionTable {
 
-  private final Map<PrimaryKey, GenericRow> _lookupTable;
+  private Map<PrimaryKey, GenericRow> _lookupTable;
   private final Schema _tableSchema;
   private final List<String> _primaryKeyColumns;
 
-  DimensionTable(Schema tableSchema, List<String> primaryKeyColumns) {
-    this(tableSchema, primaryKeyColumns, new HashMap<>());
-  }
-
-  DimensionTable(Schema tableSchema, List<String> primaryKeyColumns, Map<PrimaryKey, GenericRow> lookupTable) {
+  FastLookupDimensionTable(Schema tableSchema, List<String> primaryKeyColumns,
+      Map<PrimaryKey, GenericRow> lookupTable) {
     _lookupTable = lookupTable;
     _tableSchema = tableSchema;
     _primaryKeyColumns = primaryKeyColumns;
   }
 
-  List<String> getPrimaryKeyColumns() {
+  @Override
+  public List<String> getPrimaryKeyColumns() {
     return _primaryKeyColumns;
   }
 
-  GenericRow get(PrimaryKey pk) {
+  @Override
+  public GenericRow get(PrimaryKey pk) {
     return _lookupTable.get(pk);
   }
 
-  boolean isEmpty() {
+  @Override
+  public boolean isEmpty() {
     return _lookupTable.isEmpty();
   }
 
-  FieldSpec getFieldSpecFor(String columnName) {
+  @Override
+  public FieldSpec getFieldSpecFor(String columnName) {
     return _tableSchema.getFieldSpecFor(columnName);
   }
+
+  @Override
+  public void close()
+      throws IOException {
+  }
 }
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/LookupRecordLocation.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/LookupRecordLocation.java
new file mode 100644
index 0000000000..483760b6a3
--- /dev/null
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/LookupRecordLocation.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.data.manager.offline;
+
+import org.apache.pinot.segment.local.segment.readers.PinotSegmentRecordReader;
+import org.apache.pinot.spi.data.readers.GenericRow;
+
+
+public class LookupRecordLocation {
+  private final PinotSegmentRecordReader _pinotSegmentRecordReader;
+  private final int _docId;
+
+  public LookupRecordLocation(PinotSegmentRecordReader pinotSegmentRecordReader, int docId) {
+    _pinotSegmentRecordReader = pinotSegmentRecordReader;
+    _docId = docId;
+  }
+
+  public PinotSegmentRecordReader getPinotSegmentRecordReader() {
+    return _pinotSegmentRecordReader;
+  }
+
+  public int getDocId() {
+    return _docId;
+  }
+
+  public GenericRow getRecord(GenericRow reuse) {
+    _pinotSegmentRecordReader.getRecord(_docId, reuse);
+    return reuse;
+  }
+}
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/MemoryOptimizedDimensionTable.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/MemoryOptimizedDimensionTable.java
new file mode 100644
index 0000000000..8f74015f01
--- /dev/null
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/MemoryOptimizedDimensionTable.java
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.data.manager.offline;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import org.apache.pinot.segment.local.data.manager.SegmentDataManager;
+import org.apache.pinot.segment.local.data.manager.TableDataManager;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.data.readers.GenericRow;
+import org.apache.pinot.spi.data.readers.PrimaryKey;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+class MemoryOptimizedDimensionTable implements DimensionTable {
+  private static final Logger LOGGER = LoggerFactory.getLogger(MemoryOptimizedDimensionTable.class);
+
+  private final Map<PrimaryKey, LookupRecordLocation> _lookupTable;
+  private final Schema _tableSchema;
+  private final List<String> _primaryKeyColumns;
+  private final GenericRow _reuse = new GenericRow();
+  private final List<SegmentDataManager> _segmentDataManagers;
+  private final TableDataManager _tableDataManager;
+
+  MemoryOptimizedDimensionTable(Schema tableSchema, List<String> primaryKeyColumns,
+      Map<PrimaryKey, LookupRecordLocation> lookupTable, List<SegmentDataManager> segmentDataManagers,
+      TableDataManager tableDataManager) {
+    _tableSchema = tableSchema;
+    _primaryKeyColumns = primaryKeyColumns;
+    _lookupTable = lookupTable;
+    _segmentDataManagers = segmentDataManagers;
+    _tableDataManager = tableDataManager;
+  }
+
+  @Override
+  public List<String> getPrimaryKeyColumns() {
+    return _primaryKeyColumns;
+  }
+
+  @Override
+  public GenericRow get(PrimaryKey pk) {
+    LookupRecordLocation lookupRecordLocation = _lookupTable.get(pk);
+    if (lookupRecordLocation == null) {
+      return null;
+    }
+    return lookupRecordLocation.getRecord(_reuse);
+  }
+
+  @Override
+  public boolean isEmpty() {
+    return _lookupTable.isEmpty();
+  }
+
+  @Override
+  public FieldSpec getFieldSpecFor(String columnName) {
+    return _tableSchema.getFieldSpecFor(columnName);
+  }
+
+  @Override
+  public void close()
+      throws IOException {
+    for (LookupRecordLocation lookupRecordLocation : _lookupTable.values()) {
+      try {
+        lookupRecordLocation.getPinotSegmentRecordReader().close();
+      } catch (Exception e) {
+        LOGGER.warn("Cannot close segment record reader", e);
+      }
+    }
+
+    for (SegmentDataManager segmentDataManager : _segmentDataManagers) {
+      _tableDataManager.releaseSegment(segmentDataManager);
+    }
+  }
+}
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/offline/DimensionTableDataManagerTest.java b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/offline/DimensionTableDataManagerTest.java
index ae166583eb..5edf315dad 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/offline/DimensionTableDataManagerTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/offline/DimensionTableDataManagerTest.java
@@ -30,6 +30,7 @@ import org.apache.helix.zookeeper.datamodel.ZNRecord;
 import org.apache.pinot.common.metadata.segment.SegmentZKMetadata;
 import org.apache.pinot.common.metrics.ServerMetrics;
 import org.apache.pinot.common.utils.SchemaUtils;
+import org.apache.pinot.common.utils.config.TableConfigUtils;
 import org.apache.pinot.segment.local.data.manager.SegmentDataManager;
 import org.apache.pinot.segment.local.data.manager.TableDataManagerConfig;
 import org.apache.pinot.segment.local.data.manager.TableDataManagerParams;
@@ -41,12 +42,16 @@ import org.apache.pinot.segment.spi.SegmentMetadata;
 import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig;
 import org.apache.pinot.segment.spi.creator.SegmentIndexCreationDriver;
 import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl;
+import org.apache.pinot.spi.config.table.DimensionTableConfig;
+import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.config.table.TableType;
 import org.apache.pinot.spi.data.FieldSpec;
 import org.apache.pinot.spi.data.FieldSpec.DataType;
 import org.apache.pinot.spi.data.Schema;
 import org.apache.pinot.spi.data.readers.GenericRow;
 import org.apache.pinot.spi.data.readers.PrimaryKey;
 import org.apache.pinot.spi.metrics.PinotMetricUtils;
+import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
 import org.testng.annotations.AfterClass;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
@@ -103,6 +108,13 @@ public class DimensionTableDataManagerTest {
         .setPrimaryKeyColumns(Collections.singletonList("teamID")).build();
   }
 
+  private TableConfig getTableConfig(boolean disablePreload) {
+    DimensionTableConfig dimensionTableConfig = new DimensionTableConfig(disablePreload);
+    return new TableConfigBuilder(TableType.OFFLINE)
+        .setTableName("dimBaseballTeams").setSchemaName("dimBaseballTeams")
+        .setDimensionTableConfig(dimensionTableConfig).build();
+  }
+
   private Schema getSchemaWithExtraColumn() {
     return new Schema.SchemaBuilder().setSchemaName("dimBaseballTeams")
         .addSingleValueDimension("teamID", DataType.STRING).addSingleValueDimension("teamName", DataType.STRING)
@@ -239,4 +251,49 @@ public class DimensionTableDataManagerTest {
     assertEquals(resp.getValue("teamName"), "San Francisco Giants");
     assertEquals(resp.getValue("teamCity"), "null");
   }
+
+  @Test
+  public void testLookupWithoutPreLoad()
+      throws Exception {
+    HelixManager helixManager = mock(HelixManager.class);
+    ZkHelixPropertyStore<ZNRecord> propertyStore = mock(ZkHelixPropertyStore.class);
+    when(propertyStore.get("/SCHEMAS/dimBaseballTeams", null, AccessOption.PERSISTENT)).thenReturn(
+        SchemaUtils.toZNRecord(getSchema()));
+    when(propertyStore.get("/CONFIGS/TABLE/dimBaseballTeams", null, AccessOption.PERSISTENT)).thenReturn(
+        TableConfigUtils.toZNRecord(getTableConfig(true)));
+    when(helixManager.getHelixPropertyStore()).thenReturn(propertyStore);
+    DimensionTableDataManager tableDataManager = makeTableDataManager(helixManager);
+
+    // try fetching data BEFORE loading segment
+    GenericRow resp = tableDataManager.lookupRowByPrimaryKey(new PrimaryKey(new String[]{"SF"}));
+    assertNull(resp, "Response should be null if no segment is loaded");
+
+    tableDataManager.addSegment(_indexDir, _indexLoadingConfig);
+
+    // Confirm table is loaded and available for lookup
+    resp = tableDataManager.lookupRowByPrimaryKey(new PrimaryKey(new String[]{"SF"}));
+    assertNotNull(resp, "Should return response after segment load");
+    assertEquals(resp.getFieldToValueMap().size(), 2);
+    assertEquals(resp.getValue("teamID"), "SF");
+    assertEquals(resp.getValue("teamName"), "San Francisco Giants");
+
+    // Confirm we can get FieldSpec for loaded tables columns.
+    FieldSpec spec = tableDataManager.getColumnFieldSpec("teamName");
+    assertNotNull(spec, "Should return spec for existing column");
+    assertEquals(spec.getDataType(), DataType.STRING, "Should return correct data type for teamName column");
+
+    // Confirm we can read primary column list
+    List<String> pkColumns = tableDataManager.getPrimaryKeyColumns();
+    assertEquals(pkColumns, Collections.singletonList("teamID"), "Should return PK column list");
+
+    // Remove the segment
+    List<SegmentDataManager> segmentManagers = tableDataManager.acquireAllSegments();
+    assertEquals(segmentManagers.size(), 1, "Should have exactly one segment manager");
+    SegmentDataManager segMgr = segmentManagers.get(0);
+    String segmentName = segMgr.getSegmentName();
+    tableDataManager.removeSegment(segmentName);
+    // confirm table is cleaned up
+    resp = tableDataManager.lookupRowByPrimaryKey(new PrimaryKey(new String[]{"SF"}));
+    assertNull(resp, "Response should be null if no segment is loaded");
+  }
 }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/DimensionTableConfig.java b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/DimensionTableConfig.java
new file mode 100644
index 0000000000..b1f7a2bda0
--- /dev/null
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/DimensionTableConfig.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.spi.config.table;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import org.apache.pinot.spi.config.BaseJsonConfig;
+
+
+public class DimensionTableConfig extends BaseJsonConfig {
+  private final boolean _disablePreload;
+
+  @JsonCreator
+  public DimensionTableConfig(@JsonProperty(value = "disablePreload", required = true) boolean disablePreload) {
+    _disablePreload = disablePreload;
+  }
+
+  public boolean isDisablePreload() {
+    return _disablePreload;
+  }
+}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/TableConfig.java b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/TableConfig.java
index 6c478b7995..44fbc7f5c3 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/TableConfig.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/TableConfig.java
@@ -55,6 +55,7 @@ public class TableConfig extends BaseJsonConfig {
   public static final String FIELD_CONFIG_LIST_KEY = "fieldConfigList";
   public static final String UPSERT_CONFIG_KEY = "upsertConfig";
   public static final String DEDUP_CONFIG_KEY = "dedupConfig";
+  public static final String DIMENSION_TABLE_CONFIG_KEY = "dimensionTableConfig";
   public static final String INGESTION_CONFIG_KEY = "ingestionConfig";
   public static final String TIER_CONFIGS_LIST_KEY = "tierConfigs";
   public static final String TUNER_CONFIG_LIST_KEY = "tunerConfigs";
@@ -102,6 +103,9 @@ public class TableConfig extends BaseJsonConfig {
   @JsonPropertyDescription(value = "Dedup related config")
   private DedupConfig _dedupConfig;
 
+  @JsonPropertyDescription(value = "Dimension Table related config")
+  private DimensionTableConfig _dimensionTableConfig;
+
   @JsonPropertyDescription(value = "Config related to ingesting data into the table")
   private IngestionConfig _ingestionConfig;
 
@@ -128,6 +132,7 @@ public class TableConfig extends BaseJsonConfig {
       @JsonProperty(FIELD_CONFIG_LIST_KEY) @Nullable List<FieldConfig> fieldConfigList,
       @JsonProperty(UPSERT_CONFIG_KEY) @Nullable UpsertConfig upsertConfig,
       @JsonProperty(DEDUP_CONFIG_KEY) @Nullable DedupConfig dedupConfig,
+      @JsonProperty(DIMENSION_TABLE_CONFIG_KEY) @Nullable DimensionTableConfig dimensionTableConfig,
       @JsonProperty(INGESTION_CONFIG_KEY) @Nullable IngestionConfig ingestionConfig,
       @JsonProperty(TIER_CONFIGS_LIST_KEY) @Nullable List<TierConfig> tierConfigsList,
       @JsonProperty(IS_DIM_TABLE_KEY) boolean dimTable,
@@ -160,6 +165,7 @@ public class TableConfig extends BaseJsonConfig {
     _fieldConfigList = fieldConfigList;
     _upsertConfig = upsertConfig;
     _dedupConfig = dedupConfig;
+    _dimensionTableConfig = dimensionTableConfig;
     _ingestionConfig = ingestionConfig;
     _tierConfigsList = tierConfigsList;
     _dimTable = dimTable;
@@ -307,6 +313,15 @@ public class TableConfig extends BaseJsonConfig {
     _dedupConfig = dedupConfig;
   }
 
+  @Nullable
+  public DimensionTableConfig getDimensionTableConfig() {
+    return _dimensionTableConfig;
+  }
+
+  public void setDimensionTableConfig(DimensionTableConfig dimensionTableConfig) {
+    _dimensionTableConfig = dimensionTableConfig;
+  }
+
   @JsonProperty(INGESTION_CONFIG_KEY)
   @Nullable
   public IngestionConfig getIngestionConfig() {
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/builder/TableConfigBuilder.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/builder/TableConfigBuilder.java
index ee9c227324..4bc86d2004 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/builder/TableConfigBuilder.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/builder/TableConfigBuilder.java
@@ -24,6 +24,7 @@ import java.util.List;
 import java.util.Map;
 import org.apache.pinot.spi.config.table.CompletionConfig;
 import org.apache.pinot.spi.config.table.DedupConfig;
+import org.apache.pinot.spi.config.table.DimensionTableConfig;
 import org.apache.pinot.spi.config.table.FieldConfig;
 import org.apache.pinot.spi.config.table.IndexingConfig;
 import org.apache.pinot.spi.config.table.QueryConfig;
@@ -115,6 +116,7 @@ public class TableConfigBuilder {
 
   private UpsertConfig _upsertConfig;
   private DedupConfig _dedupConfig;
+  private DimensionTableConfig _dimensionTableConfig;
   private IngestionConfig _ingestionConfig;
   private List<TierConfig> _tierConfigList;
   private List<TunerConfig> _tunerConfigList;
@@ -357,6 +359,11 @@ public class TableConfigBuilder {
     return this;
   }
 
+  public TableConfigBuilder setDimensionTableConfig(DimensionTableConfig dimensionTableConfig) {
+    _dimensionTableConfig = dimensionTableConfig;
+    return this;
+  }
+
   public TableConfigBuilder setPeerSegmentDownloadScheme(String peerSegmentDownloadScheme) {
     _peerSegmentDownloadScheme = peerSegmentDownloadScheme;
     return this;
@@ -439,7 +446,7 @@ public class TableConfigBuilder {
 
     return new TableConfig(_tableName, _tableType.toString(), validationConfig, tenantConfig, indexingConfig,
         _customConfig, _quotaConfig, _taskConfig, _routingConfig, _queryConfig, _instanceAssignmentConfigMap,
-        _fieldConfigList, _upsertConfig, _dedupConfig, _ingestionConfig, _tierConfigList, _isDimTable, _tunerConfigList,
-        _instancePartitionsMap, _segmentAssignmentConfigMap);
+        _fieldConfigList, _upsertConfig, _dedupConfig, _dimensionTableConfig, _ingestionConfig, _tierConfigList,
+        _isDimTable, _tunerConfigList, _instancePartitionsMap, _segmentAssignmentConfigMap);
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org