You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by el...@apache.org on 2015/09/10 02:27:26 UTC

[2/3] accumulo git commit: ACCUMULO-3987 Utility for generating the nasty rfile

ACCUMULO-3987 Utility for generating the nasty rfile


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/10aba2a9
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/10aba2a9
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/10aba2a9

Branch: refs/heads/master
Commit: 10aba2a98e36246d68441ff8e8b1ece8205c4afe
Parents: 6df97b8
Author: Josh Elser <el...@apache.org>
Authored: Wed Sep 9 20:14:21 2015 -0400
Committer: Josh Elser <el...@apache.org>
Committed: Wed Sep 9 20:14:21 2015 -0400

----------------------------------------------------------------------
 .../accumulo/test/GenerateSequentialRFile.java  | 82 ++++++++++++++++++++
 .../test/BulkImportSequentialRowsIT.java        | 36 ++++-----
 2 files changed, 96 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/10aba2a9/test/src/main/java/org/apache/accumulo/test/GenerateSequentialRFile.java
----------------------------------------------------------------------
diff --git a/test/src/main/java/org/apache/accumulo/test/GenerateSequentialRFile.java b/test/src/main/java/org/apache/accumulo/test/GenerateSequentialRFile.java
new file mode 100644
index 0000000..927dff9
--- /dev/null
+++ b/test/src/main/java/org/apache/accumulo/test/GenerateSequentialRFile.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.test;
+
+import java.nio.charset.StandardCharsets;
+
+import org.apache.accumulo.core.cli.Help;
+import org.apache.accumulo.core.conf.DefaultConfiguration;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.file.FileOperations;
+import org.apache.accumulo.core.file.FileSKVWriter;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.io.Text;
+
+import com.beust.jcommander.Parameter;
+
+/**
+ * Simple tool to reproduce the file that caused problems in ACCUMULO-3967
+ */
+public class GenerateSequentialRFile implements Runnable {
+  private static final Text CF = new Text("CF");
+  private static final Text CQ = new Text("CQ");
+
+  private final Opts opts;
+
+  public GenerateSequentialRFile(Opts opts) {
+    this.opts = opts;
+  }
+
+  static class Opts extends Help {
+    @Parameter(names = {"-f", "--file"}, description = "Path to the file to create")
+    String filePath;
+    @Parameter(names = {"-nr"}, description = "Number of rows")
+    long rows = 24;
+    @Parameter(names = {"-nv"}, description = "Number of values per row")
+    long valuesPerRow = 42000;
+  }
+
+  public void run() {
+    try {
+      final Configuration conf = new Configuration();
+      final FileSystem fs = FileSystem.getLocal(conf);
+      FileSKVWriter writer = FileOperations.getInstance().openWriter(opts.filePath, fs, conf, DefaultConfiguration.getInstance());
+
+      writer.startDefaultLocalityGroup();
+
+      for (int x = 0; x < opts.rows; x++) {
+        final Text row = new Text(String.format("%03d", x));
+        for (int y = 0; y < opts.valuesPerRow; y++) {
+          final String suffix = String.format("%05d", y);
+          writer.append(new Key(new Text(row + ":" + suffix), CF, CQ), new Value(suffix.getBytes(StandardCharsets.UTF_8)));
+        }
+      }
+
+      writer.close();
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    Opts opts = new Opts();
+    opts.parseArgs(GenerateSequentialRFile.class.getName(), args);
+    new GenerateSequentialRFile(opts).run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/10aba2a9/test/src/test/java/org/apache/accumulo/test/BulkImportSequentialRowsIT.java
----------------------------------------------------------------------
diff --git a/test/src/test/java/org/apache/accumulo/test/BulkImportSequentialRowsIT.java b/test/src/test/java/org/apache/accumulo/test/BulkImportSequentialRowsIT.java
index 3fee282..a1d6f4b 100644
--- a/test/src/test/java/org/apache/accumulo/test/BulkImportSequentialRowsIT.java
+++ b/test/src/test/java/org/apache/accumulo/test/BulkImportSequentialRowsIT.java
@@ -19,15 +19,9 @@ package org.apache.accumulo.test;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
-import java.util.SortedSet;
 import java.util.TreeSet;
 
 import org.apache.accumulo.core.client.admin.TableOperations;
-import org.apache.accumulo.core.conf.DefaultConfiguration;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.file.FileOperations;
-import org.apache.accumulo.core.file.FileSKVWriter;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.harness.AccumuloClusterIT;
 import org.apache.accumulo.minicluster.impl.MiniAccumuloConfigImpl;
@@ -46,6 +40,9 @@ import com.google.common.collect.Iterables;
 public class BulkImportSequentialRowsIT extends AccumuloClusterIT {
   private static final Logger log = LoggerFactory.getLogger(BulkImportSequentialRowsIT.class);
 
+  private static final long NR = 24;
+  private static final long NV = 42000;
+
   @Override
   public int defaultTimeoutSeconds() {
     return 60;
@@ -81,32 +78,27 @@ public class BulkImportSequentialRowsIT extends AccumuloClusterIT {
     assertTrue(fs.mkdirs(err));
 
     Path rfile = new Path(bulk, "file.rf");
-    FileSKVWriter writer = FileOperations.getInstance().openWriter(rfile.toString(), fs, new Configuration(), DefaultConfiguration.getInstance());
-    writer.startDefaultLocalityGroup();
-
-    final Value emptyValue = new Value(new byte[0]);
-    final SortedSet<Text> splits = new TreeSet<Text>();
-    for (int i = 0; i < 100; i++) {
-      String row = String.format("%03d", i);
-      splits.add(new Text(row));
-      writer.append(new Key(row, "", ""), emptyValue);
-      for (int j = 0; j < 100; j++) {
-        writer.append(new Key(row, "", String.format("%03d", j)), emptyValue);
-      }
-    }
-    writer.close();
+
+    GenerateSequentialRFile.main(new String[] {"-f", rfile.toString(), "-nr", Long.toString(NR), "-nv", Long.toString(NV)});
 
     assertTrue(fs.exists(rfile));
 
     // Add some splits
-    to.addSplits(tableName, splits);
+    to.addSplits(tableName, getSplits());
 
     // Then import a single rfile to all the tablets, hoping that we get a failure to import because of the balancer moving tablets around
     // and then we get to verify that the bug is actually fixed.
     to.importDirectory(tableName, bulk.toString(), err.toString(), false);
 
     // The bug is that some tablets don't get imported into.
-    assertEquals(10100, Iterables.size(getConnector().createScanner(tableName, Authorizations.EMPTY)));
+    assertEquals(NR * NV, Iterables.size(getConnector().createScanner(tableName, Authorizations.EMPTY)));
   }
 
+  private TreeSet<Text> getSplits() {
+    TreeSet<Text> splits = new TreeSet<>();
+    for (int i = 0; i < NR; i++) {
+      splits.add(new Text(String.format("%03d", i)));
+    }
+    return splits;
+  }
 }