You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@accumulo.apache.org by "keith-turner (via GitHub)" <gi...@apache.org> on 2023/05/30 17:18:22 UTC

[GitHub] [accumulo] keith-turner commented on a diff in pull request #3435: adds tests for file misnormalization

keith-turner commented on code in PR #3435:
URL: https://github.com/apache/accumulo/pull/3435#discussion_r1210586925


##########
test/src/main/java/org/apache/accumulo/test/functional/FileNormalizationIT.java:
##########
@@ -0,0 +1,243 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.accumulo.test.functional;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.accumulo.core.client.Accumulo;
+import org.apache.accumulo.core.client.AccumuloClient;
+import org.apache.accumulo.core.client.MutationsRejectedException;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.admin.CompactionConfig;
+import org.apache.accumulo.core.client.admin.NewTableConfiguration;
+import org.apache.accumulo.core.conf.Property;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.TableId;
+import org.apache.accumulo.core.dataImpl.KeyExtent;
+import org.apache.accumulo.core.metadata.MetadataTable;
+import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.DataFileColumnFamily;
+import org.apache.accumulo.core.security.TablePermission;
+import org.apache.accumulo.harness.SharedMiniClusterBase;
+import org.apache.accumulo.test.TestIngest;
+import org.apache.accumulo.test.VerifyIngest;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * These test check that Accumulo handles misnormalized files in the metadata table correctly. If
+ * Accumulo code reads a misnormalized file from the metadata table, normalizes it, and then tries
+ * to update the metadata table then the key will not match. The mismatch could result in duplicate
+ * entries.
+ */
+public class FileNormalizationIT extends SharedMiniClusterBase {
+
+  private static final Logger log = LoggerFactory.getLogger(FileNormalizationIT.class);
+
+  @BeforeAll
+  public static void setup() throws Exception {
+    SharedMiniClusterBase.startMiniCluster();
+  }
+
+  @AfterAll
+  public static void teardown() {
+    SharedMiniClusterBase.stopMiniCluster();
+  }
+
+  @Test
+  public void testSplits() throws Exception {
+    try (AccumuloClient client = Accumulo.newClient().from(getClientProps()).build()) {
+      String table = getUniqueNames(1)[0];
+
+      client.tableOperations().create(table);
+      VerifyIngest.VerifyParams params =
+          new VerifyIngest.VerifyParams(getClientProps(), table, 100_000);
+      TestIngest.ingest(client, params);
+
+      client.tableOperations().flush(table, null, null, true);
+
+      misnormalizeFiles(client, table);
+
+      var splits = TestIngest.getSplitPoints(params.startRow, params.startRow + params.rows, 2);
+      assertEquals(1, splits.size());
+
+      client.tableOperations().addSplits(table, splits);
+
+      HashSet<String> paths = new HashSet<>();
+
+      // uncomment verify and test will hang here because tablet server dies w/ uncaught exception
+      // in AssignmentHandler
+      //
+      // VerifyIngest.verifyIngest(client, params);
+
+      try (var scanner = createMetadataFileScanner(client, table)) {
+        scanner.forEach((k, v) -> {
+          var row = k.getRowData().toString();
+          var qual = k.getColumnQualifierData().toString();
+          var path = new Path(qual).toString();
+          var rowPath = row + "+" + path;
+
+          log.debug("split test, inspecting {} {} {}", row, qual, v);
+
+          assertFalse(paths.contains(rowPath),
+              "Tablet " + row + " has duplicate normalized path " + path);
+          paths.add(rowPath);
+        });
+      }
+
+      VerifyIngest.verifyIngest(client, params);
+    }
+  }
+
+  @Test
+  public void testCompaction() throws Exception {
+    try (AccumuloClient client = Accumulo.newClient().from(getClientProps()).build()) {
+      String table = getUniqueNames(1)[0];
+
+      client.tableOperations().create(table);
+      VerifyIngest.VerifyParams params =
+          new VerifyIngest.VerifyParams(getClientProps(), table, 100_000);
+      TestIngest.ingest(client, params);
+
+      client.tableOperations().flush(table, null, null, true);
+
+      misnormalizeFiles(client, table);
+
+      client.tableOperations().compact(table, new CompactionConfig().setWait(true));
+
+      try (var scanner = createMetadataFileScanner(client, table)) {
+        Set<String> filenames = new HashSet<>();
+
+        scanner.forEach((k, v) -> {
+          var path = new Path(k.getColumnQualifierData().toString());
+          assertFalse(filenames.contains(path.getName()));
+          assertTrue(path.getName().startsWith("A"));
+          filenames.add(path.getName());
+        });
+
+        assertEquals(1, filenames.size());
+
+        VerifyIngest.verifyIngest(client, params);
+      }
+    }
+  }
+
+  @Test
+  public void testMerge() throws Exception {
+    try (AccumuloClient client = Accumulo.newClient().from(getClientProps()).build()) {
+      String table = getUniqueNames(1)[0];
+
+      // disable compactions
+      Map<String,String> props = Map.of(Property.TABLE_MAJC_RATIO.getKey(), "10");
+      client.tableOperations().create(table, new NewTableConfiguration().setProperties(props));
+      VerifyIngest.VerifyParams params =
+          new VerifyIngest.VerifyParams(getClientProps(), table, 100_000);
+      TestIngest.ingest(client, params);
+
+      client.tableOperations().flush(table, null, null, true);
+
+      var splits = TestIngest.getSplitPoints(params.startRow, params.startRow + params.rows, 4);
+      assertEquals(3, splits.size());
+
+      client.tableOperations().addSplits(table, splits);
+
+      // create a unique file per tablet
+      client.tableOperations().compact(table, new CompactionConfig().setWait(true));
+
+      misnormalizeFiles(client, table);
+
+      Set<String> filesBeforeMerge = new HashSet<>();
+      try (var scanner = createMetadataFileScanner(client, table)) {
+        scanner.forEach((k, v) -> {
+          var qual = k.getColumnQualifierData().toString();
+          assertTrue(qual.contains("//tables//"));
+          filesBeforeMerge.add(qual);
+        });
+      }
+
+      // expect 4 files. one for each tablet
+      assertEquals(4, filesBeforeMerge.size());
+
+      client.tableOperations().merge(table, null, null);
+
+      Set<String> filesAfterMerge = new HashSet<>();
+      try (var scanner = createMetadataFileScanner(client, table)) {
+        scanner.forEach((k, v) -> {
+          // should only see the default tablet
+          assertTrue(k.getRow().toString().endsWith("<"));
+          filesAfterMerge.add(k.getColumnQualifierData().toString());
+        });
+      }
+
+      assertEquals(0, client.tableOperations().listSplits(table).size());
+      assertEquals(filesBeforeMerge, filesAfterMerge);
+
+      VerifyIngest.verifyIngest(client, params);
+    }
+  }
+
+  private Scanner createMetadataFileScanner(AccumuloClient client, String table) throws Exception {
+    var scanner = client.createScanner(MetadataTable.NAME);
+    var tableId = TableId.of(client.tableOperations().tableIdMap().get(table));
+    var range = new KeyExtent(tableId, null, null).toMetaRange();
+    scanner.setRange(range);
+    scanner.fetchColumnFamily(DataFileColumnFamily.NAME);
+    return scanner;
+  }
+
+  private Set<String> misnormalizeFiles(AccumuloClient client, String table) throws Exception {

Review Comment:
   I removed that. I was planning to do something, changed course, and then forgot to remove that.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@accumulo.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org