You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@accumulo.apache.org by GitBox <gi...@apache.org> on 2021/11/04 20:04:10 UTC

[GitHub] [accumulo] Manno15 commented on a change in pull request #2347: New IteratorMincClassCastBugIT and fixes to #1411

Manno15 commented on a change in pull request #2347:
URL: https://github.com/apache/accumulo/pull/2347#discussion_r743164169



##########
File path: test/src/main/java/org/apache/accumulo/test/functional/IteratorMincClassCastBugIT.java
##########
@@ -0,0 +1,516 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.accumulo.test.functional;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+
+import org.apache.accumulo.core.client.Accumulo;
+import org.apache.accumulo.core.client.AccumuloClient;
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.BatchWriterConfig;
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.MutationsRejectedException;
+import org.apache.accumulo.core.client.admin.NewTableConfiguration;
+import org.apache.accumulo.core.conf.Property;
+import org.apache.accumulo.core.data.ArrayByteSequence;
+import org.apache.accumulo.core.data.ByteSequence;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.PartialKey;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.IteratorEnvironment;
+import org.apache.accumulo.core.iterators.IteratorUtil;
+import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
+import org.apache.accumulo.core.security.ColumnVisibility;
+import org.apache.accumulo.harness.AccumuloClusterHarness;
+import org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
+
+/**
+ * Tests iterator class hierarchy bug. See https://github.com/apache/accumulo/issues/2341
+ */
+public class IteratorMincClassCastBugIT extends AccumuloClusterHarness {
+
+  @Override
+  public void configureMiniCluster(MiniAccumuloConfigImpl cfg, Configuration hadoopCoreSite) {
+    // this bug only shows up when not using native maps
+    cfg.setProperty(Property.TSERV_NATIVEMAP_ENABLED, "false");
+    cfg.setNumTservers(1);
+  }
+
+  @Override
+  protected int defaultTimeoutSeconds() {
+    return 60;
+  }
+
+  @Test
+  public void test() throws Exception {
+    try (AccumuloClient c = Accumulo.newClient().from(getClientProps()).build()) {
+
+      String tableName = getUniqueNames(1)[0];
+
+      NewTableConfiguration ntc = new NewTableConfiguration();
+      Map<String,Set<Text>> groups = new HashMap<>();
+      groups.put("g1", Set.of(new Text("~chunk")));
+      groups.put("g2", Set.of(new Text("refs")));
+      ntc.setLocalityGroups(groups);
+
+      IteratorSetting iteratorSetting = new IteratorSetting(20, ChunkCombiner.class);
+      ntc.attachIterator(iteratorSetting, EnumSet.of(IteratorUtil.IteratorScope.minc));
+
+      c.tableOperations().create(tableName, ntc);
+
+      int chunkSize = 64 * 1024;
+      ColumnVisibility visibility = new ColumnVisibility();
+
+      List<URL> files = new ArrayList<>();
+      files.add(getClass().getClassLoader().getResource("testfile1.md"));
+
+      try (BatchWriter bw = c.createBatchWriter(tableName, new BatchWriterConfig())) {
+        FileDataIngest fdi = new FileDataIngest(chunkSize, visibility);
+        for (URL filename : files) {
+          fdi.insertFileData(filename, bw);
+        }
+      }
+
+      c.tableOperations().flush(tableName, null, null, true);
+    }
+  }
+
+  /**
+   * Copied from 2.0 examples. Takes a list of files and archives them into Accumulo keyed on hashes
+   * of the files.
+   */
+  @SuppressFBWarnings(value = "WEAK_MESSAGE_DIGEST_MD5", justification = "For testing only")
+  public static class FileDataIngest {
+    public static final Text CHUNK_CF = new Text("~chunk");
+    public static final Text REFS_CF = new Text("refs");
+    public static final String REFS_ORIG_FILE = "name";
+    public static final String REFS_FILE_EXT = "filext";
+    public static final ByteSequence CHUNK_CF_BS =
+        new ArrayByteSequence(CHUNK_CF.getBytes(), 0, CHUNK_CF.getLength());
+    public static final ByteSequence REFS_CF_BS =
+        new ArrayByteSequence(REFS_CF.getBytes(), 0, REFS_CF.getLength());
+
+    int chunkSize;
+    byte[] chunkSizeBytes;
+    byte[] buf;
+    MessageDigest md5digest;
+    ColumnVisibility cv;
+
+    public FileDataIngest(int chunkSize, ColumnVisibility colvis) {
+      this.chunkSize = chunkSize;
+      chunkSizeBytes = intToBytes(chunkSize);
+      buf = new byte[chunkSize];
+      try {
+        md5digest = MessageDigest.getInstance("MD5");
+      } catch (NoSuchAlgorithmException e) {
+        throw new RuntimeException(e);
+      }
+      cv = colvis;
+    }
+
+    public String insertFileData(URL fileURL, BatchWriter bw)
+        throws MutationsRejectedException, IOException {
+      String filename = fileURL.getFile();
+      if (chunkSize == 0)
+        return "";
+      md5digest.reset();
+      String uid = hexString(md5digest.digest(filename.getBytes()));
+
+      // read through file once, calculating hashes
+      md5digest.reset();
+      InputStream fis = null;
+      int numRead = 0;
+      // try (var reader = new InputStreamReader(accumuloPropsLocation.openStream(), UTF_8)) {

Review comment:
       ```suggestion
   
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@accumulo.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org