You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ja...@apache.org on 2021/09/03 20:47:14 UTC

[pinot] branch master updated: add more integ test cases for index removal and stronger assertion (#7385)

This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 8106b69  add more integ test cases for index removal and stronger assertion (#7385)
8106b69 is described below

commit 8106b69a823fa525ca3450d747b0eb315ec0a67b
Author: Xiaobing <61...@users.noreply.github.com>
AuthorDate: Fri Sep 3 13:45:38 2021 -0700

    add more integ test cases for index removal and stronger assertion (#7385)
---
 .../MultiNodesOfflineClusterIntegrationTest.java   |  14 ---
 .../tests/OfflineClusterIntegrationTest.java       | 118 +++++++++++++--------
 .../defaultcolumn/BaseDefaultColumnHandler.java    |   8 --
 .../spi/creator/SegmentGeneratorConfig.java        |  13 ++-
 4 files changed, 79 insertions(+), 74 deletions(-)

diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MultiNodesOfflineClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MultiNodesOfflineClusterIntegrationTest.java
index 526256d..c2ed6b8 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MultiNodesOfflineClusterIntegrationTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MultiNodesOfflineClusterIntegrationTest.java
@@ -43,20 +43,6 @@ public class MultiNodesOfflineClusterIntegrationTest extends OfflineClusterInteg
     startServers(NUM_SERVERS);
   }
 
-  // Disabled because with multiple servers, there is no guarantee that all servers get all segments reloaded
-  @Test(enabled = false)
-  @Override
-  public void testStarTreeTriggering() {
-    // Ignored
-  }
-
-  // Disabled because with multiple servers, there is no guarantee that all servers get all segments reloaded
-  @Test(enabled = false)
-  @Override
-  public void testDefaultColumns() {
-    // Ignored
-  }
-
   // Disabled because gRPC query server is not enabled
   @Test(enabled = false)
   @Override
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
index 5a8af1f..7dfc937 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
@@ -122,15 +122,18 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet
   private static final String NUM_ROWS_KEY = "numRows";
   private static final String COLUMN_LENGTH_MAP_KEY = "columnLengthMap";
   private static final String COLUMN_CARDINALITY_MAP_KEY = "columnCardinalityMap";
-  // V1 format takes 20270480 bytes. Below is what V3 format takes.
   // TODO: This might lead to flaky test, as this disk size is not deterministic
-  // as it depends on the iteration order of a HashSet.
-  private static final int DISK_SIZE_IN_BYTES = 20444064;
+  //       as it depends on the iteration order of a HashSet.
+  private static final int DISK_SIZE_IN_BYTES = 20443680;
   private static final int NUM_ROWS = 115545;
 
   private final List<ServiceStatus.ServiceStatusCallback> _serviceStatusCallbacks =
       new ArrayList<>(getNumBrokers() + getNumServers());
   private String _schemaFileName = DEFAULT_SCHEMA_FILE_NAME;
+  // Cache the table size after removing an index via reloading. Once this value
+  // is set, assert that table size always gets back to this value after removing
+  // any other kind of index.
+  private long _tableSizeAfterRemovingIndex;
 
   protected int getNumBrokers() {
     return NUM_BROKERS;
@@ -321,28 +324,23 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet
     }
   }
 
-  @Test
+  @Test(dependsOnMethods = "testRangeIndexTriggering")
   public void testInvertedIndexTriggering()
       throws Exception {
-    String offlineTableName = TableNameBuilder.OFFLINE.tableNameWithType(getTableName());
     long numTotalDocs = getCountStarResult();
-    long tableSizeWithDefaultIndex = getTableSize(offlineTableName);
 
     // Without index on DivActualElapsedTime, all docs are scanned at filtering stage.
     assertEquals(postQuery(TEST_UPDATED_INVERTED_INDEX_QUERY).get("numEntriesScannedInFilter").asLong(), numTotalDocs);
 
-    // Add inverted index, and the table size gets larger.
     addInvertedIndex();
-    long tableSizeWithNewIndex = getTableSize(offlineTableName);
-    assertTrue(tableSizeWithNewIndex > tableSizeWithDefaultIndex);
+    long tableSizeWithNewIndex = getTableSize(getTableName());
 
-    // TODO: test index removal like this for other index types.
     // Update table config to remove the new inverted index, and
     // reload table to clean the new inverted indices physically.
     TableConfig tableConfig = getOfflineTableConfig();
     tableConfig.getIndexingConfig().setInvertedIndexColumns(getInvertedIndexColumns());
     updateTableConfig(tableConfig);
-    reloadOfflineTable(offlineTableName);
+    reloadOfflineTable(getTableName());
     TestUtils.waitForCondition(aVoid -> {
       try {
         JsonNode queryResponse = postQuery(TEST_UPDATED_INVERTED_INDEX_QUERY);
@@ -354,18 +352,12 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet
         throw new RuntimeException(e);
       }
     }, 600_000L, "Failed to cleanup obsolete index");
-    // The table size after removing the index might not get back the original one,
-    // i.e. tableSizeWithDefaultIndex. Because entries in index_map file are reordered,
-    // and the file might get a different size. tableSizeAfterRemovingIndex should be
-    // close to tableSizeWithDefaultIndex, but their relationship is not deterministic.
-    long tableSizeAfterRemovingIndex = getTableSize(offlineTableName);
-    assertTrue(tableSizeAfterRemovingIndex < tableSizeWithNewIndex);
+    assertEquals(getTableSize(getTableName()), _tableSizeAfterRemovingIndex);
 
     // Add the inverted index back to test index removal via force download.
     addInvertedIndex();
-    long tableSizeAfterAddingIndexAgain = getTableSize(offlineTableName);
-    assertTrue(tableSizeAfterAddingIndexAgain > tableSizeWithDefaultIndex);
-    assertTrue(tableSizeAfterAddingIndexAgain > tableSizeAfterRemovingIndex);
+    long tableSizeAfterAddingIndexAgain = getTableSize(getTableName());
+    assertEquals(tableSizeAfterAddingIndexAgain, tableSizeWithNewIndex);
 
     // Update table config to remove the new inverted index.
     tableConfig = getOfflineTableConfig();
@@ -373,22 +365,23 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet
     updateTableConfig(tableConfig);
 
     // Force to download a single segment, and disk usage should drop a bit.
-    SegmentZKMetadata segmentZKMetadata = _helixResourceManager.getSegmentsZKMetadata(offlineTableName).get(0);
+    SegmentZKMetadata segmentZKMetadata =
+        _helixResourceManager.getSegmentsZKMetadata(TableNameBuilder.OFFLINE.tableNameWithType(getTableName())).get(0);
     String segmentName = segmentZKMetadata.getSegmentName();
-    reloadOfflineSegment(offlineTableName, segmentName, true);
+    reloadOfflineSegment(getTableName(), segmentName, true);
     TestUtils.waitForCondition(aVoid -> {
       try {
         JsonNode queryResponse = postQuery(TEST_UPDATED_INVERTED_INDEX_QUERY);
         // Total docs should not change during reload
         assertEquals(queryResponse.get("totalDocs").asLong(), numTotalDocs);
-        return getTableSize(offlineTableName) < tableSizeAfterAddingIndexAgain;
+        return getTableSize(getTableName()) < tableSizeAfterAddingIndexAgain;
       } catch (Exception e) {
         throw new RuntimeException(e);
       }
     }, 600_000L, "Failed to clean up obsolete index in segment");
 
     // Force to download the whole table and expect disk usage drops further.
-    reloadOfflineTable(offlineTableName, true);
+    reloadOfflineTable(getTableName(), true);
     TestUtils.waitForCondition(aVoid -> {
       try {
         JsonNode queryResponse = postQuery(TEST_UPDATED_INVERTED_INDEX_QUERY);
@@ -400,10 +393,8 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet
         throw new RuntimeException(e);
       }
     }, 600_000L, "Failed to cleanup obsolete index in table");
-    // Because other test cases can add new indices to the table, like testBloomFilterTriggering,
-    // so the size of table after forced download can actually get smaller than the size obtained
-    // at the beginning of this test.
-    assertTrue(getTableSize(offlineTableName) <= tableSizeWithDefaultIndex);
+    // With force download, the table size gets back to the initial value.
+    assertEquals(getTableSize(getTableName()), DISK_SIZE_IN_BYTES);
   }
 
   private void addInvertedIndex()
@@ -500,56 +491,89 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet
     assertEquals(oneHourAgoValue, Long.parseLong(oneHourAgoColumnName));
   }
 
-  @Test
+  @Test(dependsOnMethods = "testBloomFilterTriggering")
   public void testRangeIndexTriggering()
       throws Exception {
     long numTotalDocs = getCountStarResult();
-
-    JsonNode queryResponse = postQuery(TEST_UPDATED_RANGE_INDEX_QUERY);
-    assertEquals(queryResponse.get("numEntriesScannedInFilter").asLong(), numTotalDocs);
+    assertEquals(postQuery(TEST_UPDATED_RANGE_INDEX_QUERY).get("numEntriesScannedInFilter").asLong(), numTotalDocs);
 
     // Update table config and trigger reload
     TableConfig tableConfig = getOfflineTableConfig();
     tableConfig.getIndexingConfig().setRangeIndexColumns(UPDATED_RANGE_INDEX_COLUMNS);
     updateTableConfig(tableConfig);
     reloadOfflineTable(getTableName());
-
     TestUtils.waitForCondition(aVoid -> {
       try {
-        JsonNode queryResponse1 = postQuery(TEST_UPDATED_RANGE_INDEX_QUERY);
+        JsonNode queryResponse = postQuery(TEST_UPDATED_RANGE_INDEX_QUERY);
         // Total docs should not change during reload
-        assertEquals(queryResponse1.get("totalDocs").asLong(), numTotalDocs);
-        return queryResponse1.get("numEntriesScannedInFilter").asLong() < numTotalDocs;
+        assertEquals(queryResponse.get("totalDocs").asLong(), numTotalDocs);
+        return queryResponse.get("numEntriesScannedInFilter").asLong() < numTotalDocs;
       } catch (Exception e) {
         throw new RuntimeException(e);
       }
     }, 600_000L, "Failed to generate range index");
+
+    // Update table config to remove the new range index, and
+    // reload table to clean the new range index physically.
+    tableConfig = getOfflineTableConfig();
+    tableConfig.getIndexingConfig().setRangeIndexColumns(getRangeIndexColumns());
+    updateTableConfig(tableConfig);
+    reloadOfflineTable(getTableName());
+    TestUtils.waitForCondition(aVoid -> {
+      try {
+        JsonNode queryResponse = postQuery(TEST_UPDATED_RANGE_INDEX_QUERY);
+        // Total docs should not change during reload, but num entries scanned
+        // gets back to total number of documents as the index is removed.
+        assertEquals(queryResponse.get("totalDocs").asLong(), numTotalDocs);
+        return queryResponse.get("numEntriesScannedInFilter").asLong() == numTotalDocs;
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }, 600_000L, "Failed to cleanup obsolete index");
+
+    assertEquals(getTableSize(getTableName()), _tableSizeAfterRemovingIndex);
   }
 
-  @Test
+  @Test(dependsOnMethods = "testDefaultColumns")
   public void testBloomFilterTriggering()
       throws Exception {
     long numTotalDocs = getCountStarResult();
-
-    JsonNode queryResponse = postQuery(TEST_UPDATED_BLOOM_FILTER_QUERY);
-    assertEquals(queryResponse.get("numSegmentsProcessed").asLong(), NUM_SEGMENTS);
+    assertEquals(postQuery(TEST_UPDATED_BLOOM_FILTER_QUERY).get("numSegmentsProcessed").asLong(), NUM_SEGMENTS);
 
     // Update table config and trigger reload
     TableConfig tableConfig = getOfflineTableConfig();
     tableConfig.getIndexingConfig().setBloomFilterColumns(UPDATED_BLOOM_FILTER_COLUMNS);
     updateTableConfig(tableConfig);
     reloadOfflineTable(getTableName());
-
     TestUtils.waitForCondition(aVoid -> {
       try {
-        JsonNode queryResponse1 = postQuery(TEST_UPDATED_BLOOM_FILTER_QUERY);
+        JsonNode queryResponse = postQuery(TEST_UPDATED_BLOOM_FILTER_QUERY);
         // Total docs should not change during reload
-        assertEquals(queryResponse1.get("totalDocs").asLong(), numTotalDocs);
-        return queryResponse1.get("numSegmentsProcessed").asLong() == 0L;
+        assertEquals(queryResponse.get("totalDocs").asLong(), numTotalDocs);
+        return queryResponse.get("numSegmentsProcessed").asLong() == 0L;
       } catch (Exception e) {
         throw new RuntimeException(e);
       }
     }, 600_000L, "Failed to generate bloom filter");
+
+    // Update table config to remove the new bloom filter, and
+    // reload table to clean the new bloom filter physically.
+    tableConfig = getOfflineTableConfig();
+    tableConfig.getIndexingConfig().setBloomFilterColumns(getBloomFilterColumns());
+    updateTableConfig(tableConfig);
+    reloadOfflineTable(getTableName());
+    TestUtils.waitForCondition(aVoid -> {
+      try {
+        JsonNode queryResponse = postQuery(TEST_UPDATED_BLOOM_FILTER_QUERY);
+        // Total docs should not change during reload, but num entries scanned
+        // gets back to total number of documents as bloom filter is removed.
+        assertEquals(queryResponse.get("totalDocs").asLong(), numTotalDocs);
+        return queryResponse.get("numSegmentsProcessed").asLong() == NUM_SEGMENTS;
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }, 600_000L, "Failed to cleanup obsolete index");
+    assertEquals(getTableSize(getTableName()), _tableSizeAfterRemovingIndex);
   }
 
   /** Check if server returns error response quickly without timing out Broker. */
@@ -578,6 +602,7 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet
   public void testStarTreeTriggering()
       throws Exception {
     long numTotalDocs = getCountStarResult();
+    long tableSizeWithDefaultIndex = getTableSize(getTableName());
 
     // Test the first query
     JsonNode firstQueryResponse = postQuery(TEST_STAR_TREE_QUERY_1);
@@ -686,6 +711,7 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet
         throw new RuntimeException(e);
       }
     }, 600_000L, "Failed to remove star-tree index");
+    assertEquals(getTableSize(getTableName()), tableSizeWithDefaultIndex);
 
     // First query should not be able to use the star-tree
     firstQueryResponse = postQuery(TEST_STAR_TREE_QUERY_1);
@@ -722,7 +748,7 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet
    *   <li>"NewAddedDerivedSecondsSinceEpoch", DIMENSION, LONG, single-value, default (LONG.MIN_VALUE)</li>
    * </ul>
    */
-  @Test
+  @Test(dependsOnMethods = "testAggregateMetadataAPI")
   public void testDefaultColumns()
       throws Exception {
     long numTotalDocs = getCountStarResult();
@@ -743,6 +769,8 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet
     queryResponse = postQuery(SELECT_STAR_QUERY);
     assertEquals(queryResponse.get("totalDocs").asLong(), numTotalDocs);
     assertEquals(queryResponse.get("selectionResults").get("columns").size(), 79);
+
+    _tableSizeAfterRemovingIndex = getTableSize(getTableName());
   }
 
   private void reloadWithExtraColumns()
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java
index d1795f3..f118f8e 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java
@@ -32,7 +32,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import org.apache.commons.configuration.PropertiesConfiguration;
-import org.apache.commons.io.FileUtils;
 import org.apache.pinot.common.utils.StringUtil;
 import org.apache.pinot.segment.local.function.FunctionEvaluator;
 import org.apache.pinot.segment.local.function.FunctionEvaluatorFactory;
@@ -187,13 +186,6 @@ public abstract class BaseDefaultColumnHandler implements DefaultColumnHandler {
     _segmentProperties.setProperty(V1Constants.MetadataKeys.Segment.METRICS, metricColumns);
     _segmentProperties.setProperty(V1Constants.MetadataKeys.Segment.DATETIME_COLUMNS, dateTimeColumns);
 
-    // Create a back up for origin metadata.
-    File metadataFile = _segmentProperties.getFile();
-    File metadataBackUpFile = new File(metadataFile + ".bak");
-    if (!metadataBackUpFile.exists()) {
-      FileUtils.copyFile(metadataFile, metadataBackUpFile);
-    }
-
     // Save the new metadata.
     //
     // Commons Configuration 1.10 does not support file path containing '%'.
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java
index d8d9100..86fa490 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java
@@ -33,7 +33,6 @@ import java.util.Set;
 import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
 import javax.annotation.Nullable;
-import org.apache.commons.lang.StringUtils;
 import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
 import org.apache.pinot.segment.spi.creator.name.FixedSegmentNameGenerator;
 import org.apache.pinot.segment.spi.creator.name.SegmentNameGenerator;
@@ -650,15 +649,15 @@ public class SegmentGeneratorConfig implements Serializable {
     _rawIndexCompressionType.putAll(rawIndexCompressionType);
   }
 
-  public String getMetrics() {
+  public List<String> getMetrics() {
     return getQualifyingFields(FieldType.METRIC, true);
   }
 
-  public String getDimensions() {
+  public List<String> getDimensions() {
     return getQualifyingFields(FieldType.DIMENSION, true);
   }
 
-  public String getDateTimeColumnNames() {
+  public List<String> getDateTimeColumnNames() {
     return getQualifyingFields(FieldType.DATE_TIME, true);
   }
 
@@ -673,9 +672,9 @@ public class SegmentGeneratorConfig implements Serializable {
   /**
    * Returns a comma separated list of qualifying field name strings
    * @param type FieldType to filter on
-   * @return Comma separate qualifying fields names.
+   * @return list of qualifying fields names.
    */
-  private String getQualifyingFields(FieldType type, boolean excludeVirtualColumns) {
+  private List<String> getQualifyingFields(FieldType type, boolean excludeVirtualColumns) {
     List<String> fields = new ArrayList<>();
 
     for (FieldSpec fieldSpec : getSchema().getAllFieldSpecs()) {
@@ -689,7 +688,7 @@ public class SegmentGeneratorConfig implements Serializable {
     }
 
     Collections.sort(fields);
-    return StringUtils.join(fields, ",");
+    return fields;
   }
 
   public boolean isNullHandlingEnabled() {

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org