You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by GitBox <gi...@apache.org> on 2022/09/26 17:51:25 UTC

[GitHub] [pinot] somandal commented on a diff in pull request #9454: ForwardIndexHandler: Change compressionType during segmentReload

somandal commented on code in PR #9454:
URL: https://github.com/apache/pinot/pull/9454#discussion_r980312726


##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandler.java:
##########
@@ -0,0 +1,269 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.loader;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.math.BigDecimal;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.segment.readers.PinotSegmentColumnReader;
+import org.apache.pinot.segment.spi.ColumnMetadata;
+import org.apache.pinot.segment.spi.SegmentMetadata;
+import org.apache.pinot.segment.spi.V1Constants;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.creator.IndexCreationContext;
+import org.apache.pinot.segment.spi.creator.IndexCreatorProvider;
+import org.apache.pinot.segment.spi.creator.SegmentVersion;
+import org.apache.pinot.segment.spi.index.creator.ForwardIndexCreator;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
+import org.apache.pinot.segment.spi.store.ColumnIndexType;
+import org.apache.pinot.segment.spi.store.SegmentDirectory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Helper class used by {@link SegmentPreProcessor} to make changes to forward index and dictionary configs. Note
+ * that this handler only works if segment versions >= 3.0. The currently supported operations are:
+ * 1. Change compression on raw SV columns.
+ *
+ *  TODO: Add support for the following:
+ *  1. Change compression for raw MV columns
+ *  2. Enable dictionary
+ *  3. Disable dictionary
+ */
+public class ForwardIndexHandler implements IndexHandler {
+  private static final Logger LOGGER = LoggerFactory.getLogger(ForwardIndexHandler.class);
+
+  private final SegmentMetadata _segmentMetadata;
+  IndexLoadingConfig _indexLoadingConfig;
+
+  protected enum Operation {
+    CHANGE_RAW_COMPRESSION_TYPE,
+
+    // TODO: Add other operations like ENABLE_DICTIONARY, DISABLE_DICTIONARY.
+  }
+
+  public ForwardIndexHandler(SegmentMetadata segmentMetadata, IndexLoadingConfig indexLoadingConfig) {
+    _segmentMetadata = segmentMetadata;
+    _indexLoadingConfig = indexLoadingConfig;
+  }
+
+  @Override
+  public boolean needUpdateIndices(SegmentDirectory.Reader segmentReader) {
+    Map<String, Operation> columnOperationMap = computeOperation(segmentReader);
+    return !columnOperationMap.isEmpty();
+  }
+
+  @Override
+  public void updateIndices(SegmentDirectory.Writer segmentWriter, IndexCreatorProvider indexCreatorProvider)
+      throws Exception {
+    Map<String, Operation> columnOperationMap = computeOperation(segmentWriter);
+    if (columnOperationMap.isEmpty()) {
+      return;
+    }
+
+    for (Map.Entry<String, Operation> entry : columnOperationMap.entrySet()) {
+      String column = entry.getKey();
+      Operation operation = entry.getValue();
+
+      switch (operation) {
+        case CHANGE_RAW_COMPRESSION_TYPE:
+          rewriteRawForwardIndex(column, segmentWriter, indexCreatorProvider);
+          break;
+        // TODO: Add other operations here.
+        default:
+          break;
+      }
+    }
+  }
+
+  @VisibleForTesting
+  Map<String, Operation> computeOperation(SegmentDirectory.Reader segmentReader) {
+    Map<String, Operation> columnOperationMap = new HashMap<>();
+
+    // Works only if the existing segment is V3.
+    if (_segmentMetadata.getVersion() != SegmentVersion.v3) {
+      return columnOperationMap;
+    }
+
+    // From existing column config.
+    Set<String> existingAllColumns = _segmentMetadata.getAllColumns();
+    Set<String> existingDictColumns =
+        segmentReader.toSegmentDirectory().getColumnsWithIndex(ColumnIndexType.DICTIONARY);
+    Set<String> existingNoDictColumns = new HashSet<>();
+    for (String column : existingAllColumns) {
+      if (!existingDictColumns.contains(column)) {
+        existingNoDictColumns.add(column);
+      }
+    }
+
+    // From new column config.
+    Set<String> newNoDictColumns = _indexLoadingConfig.getNoDictionaryColumns();
+
+    for (String column : existingAllColumns) {
+      if (existingNoDictColumns.contains(column) && newNoDictColumns.contains(column)) {
+        // Both existing and new column is RAW forward index encoded. Check if compression needs to be changed.
+        if (shouldChangeCompressionType(column, segmentReader)) {
+          columnOperationMap.put(column, Operation.CHANGE_RAW_COMPRESSION_TYPE);
+        }
+      } else if (existingNoDictColumns.contains(column) && !newNoDictColumns.contains(column)) {
+        // TODO: Enable dictionary
+      } else if (!existingNoDictColumns.contains(column) && newNoDictColumns.contains(column)) {
+        // TODO: Disable dictionary.
+      } else {

Review Comment:
   Technically you won't need this `else` clause at all, even once the support is available for adding/removing the dictionary, right? do you need to add this here now?



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandler.java:
##########
@@ -0,0 +1,269 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.loader;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.math.BigDecimal;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.segment.readers.PinotSegmentColumnReader;
+import org.apache.pinot.segment.spi.ColumnMetadata;
+import org.apache.pinot.segment.spi.SegmentMetadata;
+import org.apache.pinot.segment.spi.V1Constants;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.creator.IndexCreationContext;
+import org.apache.pinot.segment.spi.creator.IndexCreatorProvider;
+import org.apache.pinot.segment.spi.creator.SegmentVersion;
+import org.apache.pinot.segment.spi.index.creator.ForwardIndexCreator;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
+import org.apache.pinot.segment.spi.store.ColumnIndexType;
+import org.apache.pinot.segment.spi.store.SegmentDirectory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Helper class used by {@link SegmentPreProcessor} to make changes to forward index and dictionary configs. Note
+ * that this handler only works if segment versions >= 3.0. The currently supported operations are:
+ * 1. Change compression on raw SV columns.
+ *
+ *  TODO: Add support for the following:
+ *  1. Change compression for raw MV columns
+ *  2. Enable dictionary
+ *  3. Disable dictionary
+ */
+public class ForwardIndexHandler implements IndexHandler {
+  private static final Logger LOGGER = LoggerFactory.getLogger(ForwardIndexHandler.class);
+
+  private final SegmentMetadata _segmentMetadata;
+  IndexLoadingConfig _indexLoadingConfig;
+
+  protected enum Operation {
+    CHANGE_RAW_COMPRESSION_TYPE,
+
+    // TODO: Add other operations like ENABLE_DICTIONARY, DISABLE_DICTIONARY.
+  }
+
+  public ForwardIndexHandler(SegmentMetadata segmentMetadata, IndexLoadingConfig indexLoadingConfig) {
+    _segmentMetadata = segmentMetadata;
+    _indexLoadingConfig = indexLoadingConfig;
+  }
+
+  @Override
+  public boolean needUpdateIndices(SegmentDirectory.Reader segmentReader) {
+    Map<String, Operation> columnOperationMap = computeOperation(segmentReader);
+    return !columnOperationMap.isEmpty();
+  }
+
+  @Override
+  public void updateIndices(SegmentDirectory.Writer segmentWriter, IndexCreatorProvider indexCreatorProvider)
+      throws Exception {
+    Map<String, Operation> columnOperationMap = computeOperation(segmentWriter);
+    if (columnOperationMap.isEmpty()) {
+      return;
+    }
+
+    for (Map.Entry<String, Operation> entry : columnOperationMap.entrySet()) {
+      String column = entry.getKey();
+      Operation operation = entry.getValue();
+
+      switch (operation) {
+        case CHANGE_RAW_COMPRESSION_TYPE:
+          rewriteRawForwardIndex(column, segmentWriter, indexCreatorProvider);
+          break;
+        // TODO: Add other operations here.
+        default:
+          break;
+      }
+    }
+  }
+
+  @VisibleForTesting
+  Map<String, Operation> computeOperation(SegmentDirectory.Reader segmentReader) {
+    Map<String, Operation> columnOperationMap = new HashMap<>();
+
+    // Works only if the existing segment is V3.
+    if (_segmentMetadata.getVersion() != SegmentVersion.v3) {
+      return columnOperationMap;
+    }
+
+    // From existing column config.
+    Set<String> existingAllColumns = _segmentMetadata.getAllColumns();
+    Set<String> existingDictColumns =
+        segmentReader.toSegmentDirectory().getColumnsWithIndex(ColumnIndexType.DICTIONARY);
+    Set<String> existingNoDictColumns = new HashSet<>();
+    for (String column : existingAllColumns) {
+      if (!existingDictColumns.contains(column)) {
+        existingNoDictColumns.add(column);
+      }
+    }
+
+    // From new column config.
+    Set<String> newNoDictColumns = _indexLoadingConfig.getNoDictionaryColumns();
+
+    for (String column : existingAllColumns) {
+      if (existingNoDictColumns.contains(column) && newNoDictColumns.contains(column)) {
+        // Both existing and new column is RAW forward index encoded. Check if compression needs to be changed.
+        if (shouldChangeCompressionType(column, segmentReader)) {
+          columnOperationMap.put(column, Operation.CHANGE_RAW_COMPRESSION_TYPE);
+        }
+      } else if (existingNoDictColumns.contains(column) && !newNoDictColumns.contains(column)) {
+        // TODO: Enable dictionary
+      } else if (!existingNoDictColumns.contains(column) && newNoDictColumns.contains(column)) {
+        // TODO: Disable dictionary.
+      } else {
+        // No changes necessary.
+      }
+    }
+
+    return columnOperationMap;
+  }
+
+  private boolean shouldChangeCompressionType(String column, SegmentDirectory.Reader segmentReader) {
+    ColumnMetadata existingColMetadata = _segmentMetadata.getColumnMetadataFor(column);
+
+    // TODO: Remove this MV column limitation.
+    if (!existingColMetadata.isSingleValue()) {
+      return false;
+    }
+
+    // The compression type for an existing segment can only be determined by reading the forward index header.
+    try {
+      ForwardIndexReader fwdIndexReader = LoaderUtils.getForwardIndexReader(segmentReader, existingColMetadata);
+      ChunkCompressionType existingCompressionType = fwdIndexReader.getCompressionType();
+      Preconditions.checkState(existingCompressionType != null,
+          "Existing compressionType cannot be null for raw forward index column=" + column);
+
+      // Get the new compression type.
+      ChunkCompressionType newCompressionType = null;
+      Map<String, ChunkCompressionType> newCompressionConfigs = _indexLoadingConfig.getCompressionConfigs();
+      if (newCompressionConfigs.containsKey(column)) {
+        newCompressionType = newCompressionConfigs.get(column);
+      }
+
+      // Note that default compression type (PASS_THROUGH for metric and LZ4 for dimension) is not considered if the
+      // compressionType is not explicitly provided in tableConfig. This is to avoid incorrectly rewriting the all
+      // forward indexes during segmentReload when the default compressionType changes.
+      if (newCompressionType == null || existingCompressionType == newCompressionType) {
+        return false;
+      }
+
+      return true;
+    } catch (Exception e) {
+      LOGGER.error("Caught exception while changing compression for column: {}", column, e);
+      return false;
+    }
+  }
+
+  private void rewriteRawForwardIndex(String column, SegmentDirectory.Writer segmentWriter,
+      IndexCreatorProvider indexCreatorProvider)
+      throws Exception {
+    Preconditions.checkState(_segmentMetadata.getVersion() == SegmentVersion.v3);
+
+    ColumnMetadata existingColMetadata = _segmentMetadata.getColumnMetadataFor(column);
+    File indexDir = _segmentMetadata.getIndexDir();
+    String segmentName = _segmentMetadata.getName();
+    File inProgress = new File(indexDir, column + ".fwd.inprogress");
+    File fwdIndexFile = new File(indexDir, column + V1Constants.Indexes.RAW_SV_FORWARD_INDEX_FILE_EXTENSION);
+
+    if (!inProgress.exists()) {
+      // Marker file does not exist, which means last run ended normally.
+      // Create a marker file.
+      FileUtils.touch(inProgress);
+    } else {
+      // Marker file exists, which means last run gets interrupted.
+      // Remove inverted index if exists.
+      // For v1 and v2, it's the actual inverted index. For v3, it's the temporary inverted index.
+      FileUtils.deleteQuietly(fwdIndexFile);
+    }
+
+    LOGGER.info("Creating new forward index for segment={} and column={}", segmentName, column);
+
+    Map<String, ChunkCompressionType> compressionConfigs = _indexLoadingConfig.getCompressionConfigs();
+    Preconditions.checkState(compressionConfigs.containsKey(column));
+    // At this point, compressionConfigs is guaranteed to contain the column.

Review Comment:
   nit (minor): can you call out why? that for existing columns the field config must be updated to reflect a codec change to avoid picking up the default?



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandler.java:
##########
@@ -0,0 +1,269 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.loader;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.math.BigDecimal;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.segment.readers.PinotSegmentColumnReader;
+import org.apache.pinot.segment.spi.ColumnMetadata;
+import org.apache.pinot.segment.spi.SegmentMetadata;
+import org.apache.pinot.segment.spi.V1Constants;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.creator.IndexCreationContext;
+import org.apache.pinot.segment.spi.creator.IndexCreatorProvider;
+import org.apache.pinot.segment.spi.creator.SegmentVersion;
+import org.apache.pinot.segment.spi.index.creator.ForwardIndexCreator;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
+import org.apache.pinot.segment.spi.store.ColumnIndexType;
+import org.apache.pinot.segment.spi.store.SegmentDirectory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Helper class used by {@link SegmentPreProcessor} to make changes to forward index and dictionary configs. Note
+ * that this handler only works if segment versions >= 3.0. The currently supported operations are:
+ * 1. Change compression on raw SV columns.
+ *
+ *  TODO: Add support for the following:
+ *  1. Change compression for raw MV columns
+ *  2. Enable dictionary
+ *  3. Disable dictionary
+ */
+public class ForwardIndexHandler implements IndexHandler {
+  private static final Logger LOGGER = LoggerFactory.getLogger(ForwardIndexHandler.class);
+
+  private final SegmentMetadata _segmentMetadata;
+  IndexLoadingConfig _indexLoadingConfig;
+
+  protected enum Operation {
+    CHANGE_RAW_COMPRESSION_TYPE,
+
+    // TODO: Add other operations like ENABLE_DICTIONARY, DISABLE_DICTIONARY.
+  }
+
+  public ForwardIndexHandler(SegmentMetadata segmentMetadata, IndexLoadingConfig indexLoadingConfig) {
+    _segmentMetadata = segmentMetadata;
+    _indexLoadingConfig = indexLoadingConfig;
+  }
+
+  @Override
+  public boolean needUpdateIndices(SegmentDirectory.Reader segmentReader) {
+    Map<String, Operation> columnOperationMap = computeOperation(segmentReader);
+    return !columnOperationMap.isEmpty();
+  }
+
+  @Override
+  public void updateIndices(SegmentDirectory.Writer segmentWriter, IndexCreatorProvider indexCreatorProvider)
+      throws Exception {
+    Map<String, Operation> columnOperationMap = computeOperation(segmentWriter);
+    if (columnOperationMap.isEmpty()) {
+      return;
+    }
+
+    for (Map.Entry<String, Operation> entry : columnOperationMap.entrySet()) {
+      String column = entry.getKey();
+      Operation operation = entry.getValue();
+
+      switch (operation) {
+        case CHANGE_RAW_COMPRESSION_TYPE:
+          rewriteRawForwardIndex(column, segmentWriter, indexCreatorProvider);
+          break;
+        // TODO: Add other operations here.
+        default:
+          break;
+      }
+    }
+  }
+
+  @VisibleForTesting
+  Map<String, Operation> computeOperation(SegmentDirectory.Reader segmentReader) {
+    Map<String, Operation> columnOperationMap = new HashMap<>();
+
+    // Works only if the existing segment is V3.
+    if (_segmentMetadata.getVersion() != SegmentVersion.v3) {
+      return columnOperationMap;
+    }
+
+    // From existing column config.
+    Set<String> existingAllColumns = _segmentMetadata.getAllColumns();
+    Set<String> existingDictColumns =
+        segmentReader.toSegmentDirectory().getColumnsWithIndex(ColumnIndexType.DICTIONARY);
+    Set<String> existingNoDictColumns = new HashSet<>();
+    for (String column : existingAllColumns) {
+      if (!existingDictColumns.contains(column)) {
+        existingNoDictColumns.add(column);
+      }
+    }
+
+    // From new column config.
+    Set<String> newNoDictColumns = _indexLoadingConfig.getNoDictionaryColumns();
+
+    for (String column : existingAllColumns) {
+      if (existingNoDictColumns.contains(column) && newNoDictColumns.contains(column)) {
+        // Both existing and new column is RAW forward index encoded. Check if compression needs to be changed.
+        if (shouldChangeCompressionType(column, segmentReader)) {
+          columnOperationMap.put(column, Operation.CHANGE_RAW_COMPRESSION_TYPE);
+        }
+      } else if (existingNoDictColumns.contains(column) && !newNoDictColumns.contains(column)) {
+        // TODO: Enable dictionary
+      } else if (!existingNoDictColumns.contains(column) && newNoDictColumns.contains(column)) {
+        // TODO: Disable dictionary.
+      } else {
+        // No changes necessary.
+      }
+    }
+
+    return columnOperationMap;
+  }
+
+  private boolean shouldChangeCompressionType(String column, SegmentDirectory.Reader segmentReader) {
+    ColumnMetadata existingColMetadata = _segmentMetadata.getColumnMetadataFor(column);
+
+    // TODO: Remove this MV column limitation.
+    if (!existingColMetadata.isSingleValue()) {
+      return false;
+    }
+
+    // The compression type for an existing segment can only be determined by reading the forward index header.
+    try {
+      ForwardIndexReader fwdIndexReader = LoaderUtils.getForwardIndexReader(segmentReader, existingColMetadata);
+      ChunkCompressionType existingCompressionType = fwdIndexReader.getCompressionType();
+      Preconditions.checkState(existingCompressionType != null,
+          "Existing compressionType cannot be null for raw forward index column=" + column);
+
+      // Get the new compression type.
+      ChunkCompressionType newCompressionType = null;
+      Map<String, ChunkCompressionType> newCompressionConfigs = _indexLoadingConfig.getCompressionConfigs();
+      if (newCompressionConfigs.containsKey(column)) {
+        newCompressionType = newCompressionConfigs.get(column);
+      }
+
+      // Note that default compression type (PASS_THROUGH for metric and LZ4 for dimension) is not considered if the
+      // compressionType is not explicitly provided in tableConfig. This is to avoid incorrectly rewriting the all
+      // forward indexes during segmentReload when the default compressionType changes.
+      if (newCompressionType == null || existingCompressionType == newCompressionType) {
+        return false;
+      }
+
+      return true;
+    } catch (Exception e) {
+      LOGGER.error("Caught exception while changing compression for column: {}", column, e);

Review Comment:
   nit: reword the message: "Caught exception while assessing whether the compression type has changed for column: "



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandler.java:
##########
@@ -0,0 +1,269 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.loader;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.math.BigDecimal;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.segment.readers.PinotSegmentColumnReader;
+import org.apache.pinot.segment.spi.ColumnMetadata;
+import org.apache.pinot.segment.spi.SegmentMetadata;
+import org.apache.pinot.segment.spi.V1Constants;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.creator.IndexCreationContext;
+import org.apache.pinot.segment.spi.creator.IndexCreatorProvider;
+import org.apache.pinot.segment.spi.creator.SegmentVersion;
+import org.apache.pinot.segment.spi.index.creator.ForwardIndexCreator;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
+import org.apache.pinot.segment.spi.store.ColumnIndexType;
+import org.apache.pinot.segment.spi.store.SegmentDirectory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Helper class used by {@link SegmentPreProcessor} to make changes to forward index and dictionary configs. Note
+ * that this handler only works if segment versions >= 3.0. The currently supported operations are:

Review Comment:
   is there a plan to add support for v1 columns as well in the future? If so, add to the TODO list? If not, is there a specific reason why?



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandler.java:
##########
@@ -0,0 +1,269 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.loader;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.math.BigDecimal;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.segment.readers.PinotSegmentColumnReader;
+import org.apache.pinot.segment.spi.ColumnMetadata;
+import org.apache.pinot.segment.spi.SegmentMetadata;
+import org.apache.pinot.segment.spi.V1Constants;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.creator.IndexCreationContext;
+import org.apache.pinot.segment.spi.creator.IndexCreatorProvider;
+import org.apache.pinot.segment.spi.creator.SegmentVersion;
+import org.apache.pinot.segment.spi.index.creator.ForwardIndexCreator;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
+import org.apache.pinot.segment.spi.store.ColumnIndexType;
+import org.apache.pinot.segment.spi.store.SegmentDirectory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Helper class used by {@link SegmentPreProcessor} to make changes to forward index and dictionary configs. Note
+ * that this handler only works if segment versions >= 3.0. The currently supported operations are:
+ * 1. Change compression on raw SV columns.
+ *
+ *  TODO: Add support for the following:
+ *  1. Change compression for raw MV columns
+ *  2. Enable dictionary
+ *  3. Disable dictionary
+ */
+public class ForwardIndexHandler implements IndexHandler {
+  private static final Logger LOGGER = LoggerFactory.getLogger(ForwardIndexHandler.class);
+
+  private final SegmentMetadata _segmentMetadata;
+  IndexLoadingConfig _indexLoadingConfig;
+
+  protected enum Operation {
+    CHANGE_RAW_COMPRESSION_TYPE,
+
+    // TODO: Add other operations like ENABLE_DICTIONARY, DISABLE_DICTIONARY.
+  }
+
+  public ForwardIndexHandler(SegmentMetadata segmentMetadata, IndexLoadingConfig indexLoadingConfig) {
+    _segmentMetadata = segmentMetadata;
+    _indexLoadingConfig = indexLoadingConfig;
+  }
+
+  @Override
+  public boolean needUpdateIndices(SegmentDirectory.Reader segmentReader) {
+    Map<String, Operation> columnOperationMap = computeOperation(segmentReader);
+    return !columnOperationMap.isEmpty();
+  }
+
+  @Override
+  public void updateIndices(SegmentDirectory.Writer segmentWriter, IndexCreatorProvider indexCreatorProvider)
+      throws Exception {
+    Map<String, Operation> columnOperationMap = computeOperation(segmentWriter);
+    if (columnOperationMap.isEmpty()) {
+      return;
+    }
+
+    for (Map.Entry<String, Operation> entry : columnOperationMap.entrySet()) {
+      String column = entry.getKey();
+      Operation operation = entry.getValue();
+
+      switch (operation) {
+        case CHANGE_RAW_COMPRESSION_TYPE:
+          rewriteRawForwardIndex(column, segmentWriter, indexCreatorProvider);
+          break;
+        // TODO: Add other operations here.
+        default:
+          break;

Review Comment:
   Throw an exception here for the default case? Any operations we don't support should error out, including ones we don't have support for yet.



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandler.java:
##########
@@ -0,0 +1,269 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.loader;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.math.BigDecimal;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.segment.readers.PinotSegmentColumnReader;
+import org.apache.pinot.segment.spi.ColumnMetadata;
+import org.apache.pinot.segment.spi.SegmentMetadata;
+import org.apache.pinot.segment.spi.V1Constants;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.creator.IndexCreationContext;
+import org.apache.pinot.segment.spi.creator.IndexCreatorProvider;
+import org.apache.pinot.segment.spi.creator.SegmentVersion;
+import org.apache.pinot.segment.spi.index.creator.ForwardIndexCreator;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
+import org.apache.pinot.segment.spi.store.ColumnIndexType;
+import org.apache.pinot.segment.spi.store.SegmentDirectory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Helper class used by {@link SegmentPreProcessor} to make changes to forward index and dictionary configs. Note
+ * that this handler only works if segment versions >= 3.0. The currently supported operations are:
+ * 1. Change compression on raw SV columns.
+ *
+ *  TODO: Add support for the following:
+ *  1. Change compression for raw MV columns
+ *  2. Enable dictionary
+ *  3. Disable dictionary
+ */
+public class ForwardIndexHandler implements IndexHandler {
+  private static final Logger LOGGER = LoggerFactory.getLogger(ForwardIndexHandler.class);
+
+  private final SegmentMetadata _segmentMetadata;
+  IndexLoadingConfig _indexLoadingConfig;
+
+  protected enum Operation {
+    CHANGE_RAW_COMPRESSION_TYPE,
+
+    // TODO: Add other operations like ENABLE_DICTIONARY, DISABLE_DICTIONARY.
+  }
+
+  public ForwardIndexHandler(SegmentMetadata segmentMetadata, IndexLoadingConfig indexLoadingConfig) {
+    _segmentMetadata = segmentMetadata;
+    _indexLoadingConfig = indexLoadingConfig;
+  }
+
+  @Override
+  public boolean needUpdateIndices(SegmentDirectory.Reader segmentReader) {
+    Map<String, Operation> columnOperationMap = computeOperation(segmentReader);
+    return !columnOperationMap.isEmpty();
+  }
+
+  @Override
+  public void updateIndices(SegmentDirectory.Writer segmentWriter, IndexCreatorProvider indexCreatorProvider)
+      throws Exception {
+    Map<String, Operation> columnOperationMap = computeOperation(segmentWriter);
+    if (columnOperationMap.isEmpty()) {
+      return;
+    }
+
+    for (Map.Entry<String, Operation> entry : columnOperationMap.entrySet()) {
+      String column = entry.getKey();
+      Operation operation = entry.getValue();
+
+      switch (operation) {
+        case CHANGE_RAW_COMPRESSION_TYPE:
+          rewriteRawForwardIndex(column, segmentWriter, indexCreatorProvider);
+          break;
+        // TODO: Add other operations here.
+        default:
+          break;
+      }
+    }
+  }
+
+  @VisibleForTesting
+  Map<String, Operation> computeOperation(SegmentDirectory.Reader segmentReader) {
+    Map<String, Operation> columnOperationMap = new HashMap<>();
+
+    // Works only if the existing segment is V3.
+    if (_segmentMetadata.getVersion() != SegmentVersion.v3) {
+      return columnOperationMap;
+    }
+
+    // From existing column config.
+    Set<String> existingAllColumns = _segmentMetadata.getAllColumns();
+    Set<String> existingDictColumns =
+        segmentReader.toSegmentDirectory().getColumnsWithIndex(ColumnIndexType.DICTIONARY);
+    Set<String> existingNoDictColumns = new HashSet<>();
+    for (String column : existingAllColumns) {
+      if (!existingDictColumns.contains(column)) {
+        existingNoDictColumns.add(column);
+      }
+    }
+
+    // From new column config.
+    Set<String> newNoDictColumns = _indexLoadingConfig.getNoDictionaryColumns();
+
+    for (String column : existingAllColumns) {
+      if (existingNoDictColumns.contains(column) && newNoDictColumns.contains(column)) {
+        // Both existing and new column is RAW forward index encoded. Check if compression needs to be changed.
+        if (shouldChangeCompressionType(column, segmentReader)) {
+          columnOperationMap.put(column, Operation.CHANGE_RAW_COMPRESSION_TYPE);
+        }
+      } else if (existingNoDictColumns.contains(column) && !newNoDictColumns.contains(column)) {
+        // TODO: Enable dictionary
+      } else if (!existingNoDictColumns.contains(column) && newNoDictColumns.contains(column)) {
+        // TODO: Disable dictionary.
+      } else {
+        // No changes necessary.
+      }
+    }
+
+    return columnOperationMap;
+  }
+
+  private boolean shouldChangeCompressionType(String column, SegmentDirectory.Reader segmentReader) {
+    ColumnMetadata existingColMetadata = _segmentMetadata.getColumnMetadataFor(column);
+
+    // TODO: Remove this MV column limitation.
+    if (!existingColMetadata.isSingleValue()) {
+      return false;
+    }
+
+    // The compression type for an existing segment can only be determined by reading the forward index header.
+    try {
+      ForwardIndexReader fwdIndexReader = LoaderUtils.getForwardIndexReader(segmentReader, existingColMetadata);
+      ChunkCompressionType existingCompressionType = fwdIndexReader.getCompressionType();
+      Preconditions.checkState(existingCompressionType != null,
+          "Existing compressionType cannot be null for raw forward index column=" + column);
+
+      // Get the new compression type.
+      ChunkCompressionType newCompressionType = null;
+      Map<String, ChunkCompressionType> newCompressionConfigs = _indexLoadingConfig.getCompressionConfigs();
+      if (newCompressionConfigs.containsKey(column)) {
+        newCompressionType = newCompressionConfigs.get(column);
+      }
+
+      // Note that default compression type (PASS_THROUGH for metric and LZ4 for dimension) is not considered if the
+      // compressionType is not explicitly provided in tableConfig. This is to avoid incorrectly rewriting the all
+      // forward indexes during segmentReload when the default compressionType changes.
+      if (newCompressionType == null || existingCompressionType == newCompressionType) {
+        return false;
+      }
+
+      return true;
+    } catch (Exception e) {
+      LOGGER.error("Caught exception while changing compression for column: {}", column, e);
+      return false;
+    }
+  }
+
+  private void rewriteRawForwardIndex(String column, SegmentDirectory.Writer segmentWriter,
+      IndexCreatorProvider indexCreatorProvider)
+      throws Exception {
+    Preconditions.checkState(_segmentMetadata.getVersion() == SegmentVersion.v3);
+
+    ColumnMetadata existingColMetadata = _segmentMetadata.getColumnMetadataFor(column);
+    File indexDir = _segmentMetadata.getIndexDir();
+    String segmentName = _segmentMetadata.getName();
+    File inProgress = new File(indexDir, column + ".fwd.inprogress");
+    File fwdIndexFile = new File(indexDir, column + V1Constants.Indexes.RAW_SV_FORWARD_INDEX_FILE_EXTENSION);
+
+    if (!inProgress.exists()) {
+      // Marker file does not exist, which means last run ended normally.
+      // Create a marker file.
+      FileUtils.touch(inProgress);
+    } else {
+      // Marker file exists, which means last run gets interrupted.
+      // Remove inverted index if exists.

Review Comment:
   nit: inverted index -> forward index



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessor.java:
##########
@@ -104,8 +104,8 @@ public void process()
       // Update single-column indices, like inverted index, json index etc.
       IndexCreatorProvider indexCreatorProvider = IndexingOverrides.getIndexCreatorProvider();
       for (ColumnIndexType type : ColumnIndexType.values()) {
-        IndexHandlerFactory.getIndexHandler(type, _segmentMetadata, _indexLoadingConfig)
-            .updateIndices(segmentWriter, indexCreatorProvider);
+        IndexHandler handler = IndexHandlerFactory.getIndexHandler(type, _segmentMetadata, _indexLoadingConfig);
+        handler.updateIndices(segmentWriter, indexCreatorProvider);

Review Comment:
   just curious, is there a need to handle forward index handler first before calling other index handlers? I think range index will need to be rewritten based on the compression codec change, right?



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexLoadingConfig.java:
##########
@@ -215,6 +218,26 @@ private void extractFromTableConfig(TableConfig tableConfig) {
     }
   }
 
+  /**
+   * Extracts compressionType for each column. Populates a map containing column name as key and compression type as
+   * value. Note that only RAW forward index columns will be populated in this map.

Review Comment:
   I'd recommend adding a short comment here stating that this map only contains the overrides for the compression type and does not include the default value for METRICS (PASSTHROUGH) or DIMENSION (LZ4) no dictionary columns.
   
   Perhaps add a note about `getColumnCompressionType()` in `SegmentColumnarIndexCreater` setting up the defaults if the `fieldConfig` doesn't contain the compression codec.
   
   Just want to ensure that in the future no one uses this assuming that all compression types are extracted and defaults are filled in for the compression types.



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandler.java:
##########
@@ -0,0 +1,269 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.loader;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.math.BigDecimal;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.segment.readers.PinotSegmentColumnReader;
+import org.apache.pinot.segment.spi.ColumnMetadata;
+import org.apache.pinot.segment.spi.SegmentMetadata;
+import org.apache.pinot.segment.spi.V1Constants;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.creator.IndexCreationContext;
+import org.apache.pinot.segment.spi.creator.IndexCreatorProvider;
+import org.apache.pinot.segment.spi.creator.SegmentVersion;
+import org.apache.pinot.segment.spi.index.creator.ForwardIndexCreator;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
+import org.apache.pinot.segment.spi.store.ColumnIndexType;
+import org.apache.pinot.segment.spi.store.SegmentDirectory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Helper class used by {@link SegmentPreProcessor} to make changes to forward index and dictionary configs. Note
+ * that this handler only works if segment versions >= 3.0. The currently supported operations are:
+ * 1. Change compression on raw SV columns.
+ *
+ *  TODO: Add support for the following:
+ *  1. Change compression for raw MV columns
+ *  2. Enable dictionary
+ *  3. Disable dictionary
+ */
+public class ForwardIndexHandler implements IndexHandler {
+  private static final Logger LOGGER = LoggerFactory.getLogger(ForwardIndexHandler.class);
+
+  private final SegmentMetadata _segmentMetadata;
+  IndexLoadingConfig _indexLoadingConfig;
+
+  protected enum Operation {
+    CHANGE_RAW_COMPRESSION_TYPE,
+
+    // TODO: Add other operations like ENABLE_DICTIONARY, DISABLE_DICTIONARY.
+  }
+
+  public ForwardIndexHandler(SegmentMetadata segmentMetadata, IndexLoadingConfig indexLoadingConfig) {
+    _segmentMetadata = segmentMetadata;
+    _indexLoadingConfig = indexLoadingConfig;
+  }
+
+  @Override
+  public boolean needUpdateIndices(SegmentDirectory.Reader segmentReader) {
+    Map<String, Operation> columnOperationMap = computeOperation(segmentReader);
+    return !columnOperationMap.isEmpty();
+  }
+
+  @Override
+  public void updateIndices(SegmentDirectory.Writer segmentWriter, IndexCreatorProvider indexCreatorProvider)
+      throws Exception {
+    Map<String, Operation> columnOperationMap = computeOperation(segmentWriter);
+    if (columnOperationMap.isEmpty()) {
+      return;
+    }
+
+    for (Map.Entry<String, Operation> entry : columnOperationMap.entrySet()) {
+      String column = entry.getKey();
+      Operation operation = entry.getValue();
+
+      switch (operation) {
+        case CHANGE_RAW_COMPRESSION_TYPE:
+          rewriteRawForwardIndex(column, segmentWriter, indexCreatorProvider);
+          break;
+        // TODO: Add other operations here.
+        default:
+          break;
+      }
+    }
+  }
+
+  @VisibleForTesting
+  Map<String, Operation> computeOperation(SegmentDirectory.Reader segmentReader) {
+    Map<String, Operation> columnOperationMap = new HashMap<>();
+
+    // Works only if the existing segment is V3.
+    if (_segmentMetadata.getVersion() != SegmentVersion.v3) {
+      return columnOperationMap;
+    }
+
+    // From existing column config.
+    Set<String> existingAllColumns = _segmentMetadata.getAllColumns();
+    Set<String> existingDictColumns =
+        segmentReader.toSegmentDirectory().getColumnsWithIndex(ColumnIndexType.DICTIONARY);
+    Set<String> existingNoDictColumns = new HashSet<>();
+    for (String column : existingAllColumns) {
+      if (!existingDictColumns.contains(column)) {
+        existingNoDictColumns.add(column);
+      }
+    }
+
+    // From new column config.
+    Set<String> newNoDictColumns = _indexLoadingConfig.getNoDictionaryColumns();
+
+    for (String column : existingAllColumns) {
+      if (existingNoDictColumns.contains(column) && newNoDictColumns.contains(column)) {
+        // Both existing and new column is RAW forward index encoded. Check if compression needs to be changed.
+        if (shouldChangeCompressionType(column, segmentReader)) {
+          columnOperationMap.put(column, Operation.CHANGE_RAW_COMPRESSION_TYPE);
+        }
+      } else if (existingNoDictColumns.contains(column) && !newNoDictColumns.contains(column)) {
+        // TODO: Enable dictionary
+      } else if (!existingNoDictColumns.contains(column) && newNoDictColumns.contains(column)) {
+        // TODO: Disable dictionary.
+      } else {
+        // No changes necessary.
+      }
+    }
+
+    return columnOperationMap;
+  }
+
+  private boolean shouldChangeCompressionType(String column, SegmentDirectory.Reader segmentReader) {
+    ColumnMetadata existingColMetadata = _segmentMetadata.getColumnMetadataFor(column);
+
+    // TODO: Remove this MV column limitation.
+    if (!existingColMetadata.isSingleValue()) {
+      return false;
+    }
+
+    // The compression type for an existing segment can only be determined by reading the forward index header.
+    try {
+      ForwardIndexReader fwdIndexReader = LoaderUtils.getForwardIndexReader(segmentReader, existingColMetadata);
+      ChunkCompressionType existingCompressionType = fwdIndexReader.getCompressionType();
+      Preconditions.checkState(existingCompressionType != null,
+          "Existing compressionType cannot be null for raw forward index column=" + column);
+
+      // Get the new compression type.
+      ChunkCompressionType newCompressionType = null;
+      Map<String, ChunkCompressionType> newCompressionConfigs = _indexLoadingConfig.getCompressionConfigs();
+      if (newCompressionConfigs.containsKey(column)) {
+        newCompressionType = newCompressionConfigs.get(column);
+      }
+
+      // Note that default compression type (PASS_THROUGH for metric and LZ4 for dimension) is not considered if the
+      // compressionType is not explicitly provided in tableConfig. This is to avoid incorrectly rewriting the all
+      // forward indexes during segmentReload when the default compressionType changes.
+      if (newCompressionType == null || existingCompressionType == newCompressionType) {
+        return false;
+      }
+
+      return true;
+    } catch (Exception e) {
+      LOGGER.error("Caught exception while changing compression for column: {}", column, e);
+      return false;
+    }
+  }
+
+  private void rewriteRawForwardIndex(String column, SegmentDirectory.Writer segmentWriter,
+      IndexCreatorProvider indexCreatorProvider)
+      throws Exception {
+    Preconditions.checkState(_segmentMetadata.getVersion() == SegmentVersion.v3);
+
+    ColumnMetadata existingColMetadata = _segmentMetadata.getColumnMetadataFor(column);
+    File indexDir = _segmentMetadata.getIndexDir();
+    String segmentName = _segmentMetadata.getName();
+    File inProgress = new File(indexDir, column + ".fwd.inprogress");
+    File fwdIndexFile = new File(indexDir, column + V1Constants.Indexes.RAW_SV_FORWARD_INDEX_FILE_EXTENSION);
+
+    if (!inProgress.exists()) {
+      // Marker file does not exist, which means last run ended normally.
+      // Create a marker file.
+      FileUtils.touch(inProgress);
+    } else {
+      // Marker file exists, which means last run gets interrupted.
+      // Remove inverted index if exists.
+      // For v1 and v2, it's the actual inverted index. For v3, it's the temporary inverted index.

Review Comment:
   nit: inverted index -> forward index?
   we're only handling v3 here, right? update the comment to reflect that?



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandler.java:
##########
@@ -0,0 +1,269 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.index.loader;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import java.io.File;
+import java.math.BigDecimal;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.segment.readers.PinotSegmentColumnReader;
+import org.apache.pinot.segment.spi.ColumnMetadata;
+import org.apache.pinot.segment.spi.SegmentMetadata;
+import org.apache.pinot.segment.spi.V1Constants;
+import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
+import org.apache.pinot.segment.spi.creator.IndexCreationContext;
+import org.apache.pinot.segment.spi.creator.IndexCreatorProvider;
+import org.apache.pinot.segment.spi.creator.SegmentVersion;
+import org.apache.pinot.segment.spi.index.creator.ForwardIndexCreator;
+import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
+import org.apache.pinot.segment.spi.store.ColumnIndexType;
+import org.apache.pinot.segment.spi.store.SegmentDirectory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Helper class used by {@link SegmentPreProcessor} to make changes to forward index and dictionary configs. Note
+ * that this handler only works if segment versions >= 3.0. The currently supported operations are:
+ * 1. Change compression on raw SV columns.
+ *
+ *  TODO: Add support for the following:
+ *  1. Change compression for raw MV columns
+ *  2. Enable dictionary
+ *  3. Disable dictionary
+ */
+public class ForwardIndexHandler implements IndexHandler {
+  private static final Logger LOGGER = LoggerFactory.getLogger(ForwardIndexHandler.class);
+
+  private final SegmentMetadata _segmentMetadata;
+  IndexLoadingConfig _indexLoadingConfig;
+
+  protected enum Operation {
+    CHANGE_RAW_COMPRESSION_TYPE,
+
+    // TODO: Add other operations like ENABLE_DICTIONARY, DISABLE_DICTIONARY.
+  }
+
+  public ForwardIndexHandler(SegmentMetadata segmentMetadata, IndexLoadingConfig indexLoadingConfig) {
+    _segmentMetadata = segmentMetadata;
+    _indexLoadingConfig = indexLoadingConfig;
+  }
+
+  @Override
+  public boolean needUpdateIndices(SegmentDirectory.Reader segmentReader) {
+    Map<String, Operation> columnOperationMap = computeOperation(segmentReader);
+    return !columnOperationMap.isEmpty();
+  }
+
+  @Override
+  public void updateIndices(SegmentDirectory.Writer segmentWriter, IndexCreatorProvider indexCreatorProvider)
+      throws Exception {
+    Map<String, Operation> columnOperationMap = computeOperation(segmentWriter);
+    if (columnOperationMap.isEmpty()) {
+      return;
+    }
+
+    for (Map.Entry<String, Operation> entry : columnOperationMap.entrySet()) {
+      String column = entry.getKey();
+      Operation operation = entry.getValue();
+
+      switch (operation) {
+        case CHANGE_RAW_COMPRESSION_TYPE:
+          rewriteRawForwardIndex(column, segmentWriter, indexCreatorProvider);
+          break;
+        // TODO: Add other operations here.
+        default:
+          break;
+      }
+    }
+  }
+
+  @VisibleForTesting
+  Map<String, Operation> computeOperation(SegmentDirectory.Reader segmentReader) {
+    Map<String, Operation> columnOperationMap = new HashMap<>();
+
+    // Works only if the existing segment is V3.
+    if (_segmentMetadata.getVersion() != SegmentVersion.v3) {
+      return columnOperationMap;
+    }
+
+    // From existing column config.
+    Set<String> existingAllColumns = _segmentMetadata.getAllColumns();
+    Set<String> existingDictColumns =
+        segmentReader.toSegmentDirectory().getColumnsWithIndex(ColumnIndexType.DICTIONARY);
+    Set<String> existingNoDictColumns = new HashSet<>();
+    for (String column : existingAllColumns) {
+      if (!existingDictColumns.contains(column)) {
+        existingNoDictColumns.add(column);
+      }
+    }
+
+    // From new column config.
+    Set<String> newNoDictColumns = _indexLoadingConfig.getNoDictionaryColumns();
+
+    for (String column : existingAllColumns) {
+      if (existingNoDictColumns.contains(column) && newNoDictColumns.contains(column)) {
+        // Both existing and new column is RAW forward index encoded. Check if compression needs to be changed.
+        if (shouldChangeCompressionType(column, segmentReader)) {
+          columnOperationMap.put(column, Operation.CHANGE_RAW_COMPRESSION_TYPE);
+        }
+      } else if (existingNoDictColumns.contains(column) && !newNoDictColumns.contains(column)) {
+        // TODO: Enable dictionary
+      } else if (!existingNoDictColumns.contains(column) && newNoDictColumns.contains(column)) {
+        // TODO: Disable dictionary.
+      } else {
+        // No changes necessary.
+      }
+    }
+
+    return columnOperationMap;
+  }
+
+  private boolean shouldChangeCompressionType(String column, SegmentDirectory.Reader segmentReader) {
+    ColumnMetadata existingColMetadata = _segmentMetadata.getColumnMetadataFor(column);
+
+    // TODO: Remove this MV column limitation.
+    if (!existingColMetadata.isSingleValue()) {
+      return false;
+    }
+
+    // The compression type for an existing segment can only be determined by reading the forward index header.
+    try {
+      ForwardIndexReader fwdIndexReader = LoaderUtils.getForwardIndexReader(segmentReader, existingColMetadata);
+      ChunkCompressionType existingCompressionType = fwdIndexReader.getCompressionType();
+      Preconditions.checkState(existingCompressionType != null,
+          "Existing compressionType cannot be null for raw forward index column=" + column);
+
+      // Get the new compression type.
+      ChunkCompressionType newCompressionType = null;
+      Map<String, ChunkCompressionType> newCompressionConfigs = _indexLoadingConfig.getCompressionConfigs();
+      if (newCompressionConfigs.containsKey(column)) {
+        newCompressionType = newCompressionConfigs.get(column);
+      }
+
+      // Note that default compression type (PASS_THROUGH for metric and LZ4 for dimension) is not considered if the
+      // compressionType is not explicitly provided in tableConfig. This is to avoid incorrectly rewriting the all
+      // forward indexes during segmentReload when the default compressionType changes.
+      if (newCompressionType == null || existingCompressionType == newCompressionType) {
+        return false;
+      }
+
+      return true;
+    } catch (Exception e) {
+      LOGGER.error("Caught exception while changing compression for column: {}", column, e);
+      return false;
+    }
+  }
+
+  private void rewriteRawForwardIndex(String column, SegmentDirectory.Writer segmentWriter,
+      IndexCreatorProvider indexCreatorProvider)
+      throws Exception {
+    Preconditions.checkState(_segmentMetadata.getVersion() == SegmentVersion.v3);
+
+    ColumnMetadata existingColMetadata = _segmentMetadata.getColumnMetadataFor(column);
+    File indexDir = _segmentMetadata.getIndexDir();
+    String segmentName = _segmentMetadata.getName();
+    File inProgress = new File(indexDir, column + ".fwd.inprogress");
+    File fwdIndexFile = new File(indexDir, column + V1Constants.Indexes.RAW_SV_FORWARD_INDEX_FILE_EXTENSION);
+
+    if (!inProgress.exists()) {
+      // Marker file does not exist, which means last run ended normally.
+      // Create a marker file.
+      FileUtils.touch(inProgress);
+    } else {
+      // Marker file exists, which means last run gets interrupted.
+      // Remove inverted index if exists.
+      // For v1 and v2, it's the actual inverted index. For v3, it's the temporary inverted index.
+      FileUtils.deleteQuietly(fwdIndexFile);
+    }
+
+    LOGGER.info("Creating new forward index for segment={} and column={}", segmentName, column);
+
+    Map<String, ChunkCompressionType> compressionConfigs = _indexLoadingConfig.getCompressionConfigs();
+    Preconditions.checkState(compressionConfigs.containsKey(column));
+    // At this point, compressionConfigs is guaranteed to contain the column.
+    ChunkCompressionType newCompressionType = compressionConfigs.get(column);
+
+    int numDocs = existingColMetadata.getTotalDocs();
+
+    try (ForwardIndexReader reader = LoaderUtils.getForwardIndexReader(segmentWriter, existingColMetadata)) {
+      int lengthOfLongestEntry = reader.getLengthOfLongestEntry();
+      Preconditions.checkState(lengthOfLongestEntry >= 0,
+          "lengthOfLongestEntry cannot be negative. segment=" + segmentName + " column={}" + column);
+
+      IndexCreationContext.Forward context =
+          IndexCreationContext.builder().withIndexDir(indexDir).withColumnMetadata(existingColMetadata)
+              .withLengthOfLongestEntry(lengthOfLongestEntry).build()
+              .forForwardIndex(newCompressionType, _indexLoadingConfig.getColumnProperties());
+
+      try (ForwardIndexCreator creator = indexCreatorProvider.newForwardIndexCreator(context)) {
+        PinotSegmentColumnReader columnReader =
+            new PinotSegmentColumnReader(reader, null, null, existingColMetadata.getMaxNumberOfMultiValues());
+
+        for (int i = 0; i < numDocs; i++) {
+          Object val = columnReader.getValue(i);
+
+          // JSON fields are either stored as string or bytes. No special handling is needed.

Review Comment:
   Just curious, in the `SegmentColumnarIndexCreator` I see the following for JSON handling:
   
   ```
               case JSON:
                 if (columnValueToIndex instanceof String) {
                   forwardIndexCreator.putString((String) columnValueToIndex);
                 } else if (columnValueToIndex instanceof byte[]) {
                   forwardIndexCreator.putBytes((byte[]) columnValueToIndex);
                 }
                 break;
   ```
   
   Do we not need to do the same here? 
   
   From checking the code it looks like the forward index creator is correctly setup with the stored type: `context.getFieldSpec().getDataType().getStoredType()` so maybe this is indeed not a concern here. Just asking to double check this part.
   
   Not needed for this PR but we should perhaps cleanup this JSON handling part in `SegmentColumnarIndexCreator` as well to avoid confusion.



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexLoadingConfig.java:
##########
@@ -424,6 +447,16 @@ public Set<String> getNoDictionaryColumns() {
     return _noDictionaryColumns;
   }
 
+  /**
+   * Populates a map containing column name as key and compression type as value. Note that only RAW forward index
+   * columns will be populated in this map.
+   *
+   * @return a map containing column name as key and compressionType as value.

Review Comment:
   nit: perhaps update the comment here too to indicate that default value columns for compression codec won't be included.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org