You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by kt...@apache.org on 2019/04/26 20:22:57 UTC
[accumulo-examples] branch master updated: Fix #42 Update compactionStrategy example (#49)

This is an automated email from the ASF dual-hosted git repository.

kturner pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/accumulo-examples.git


The following commit(s) were added to refs/heads/master by this push:
     new 983c371  Fix #42 Update compactionStrategy example (#49)
983c371 is described below

commit 983c3715b3d69377a96550623fa1ca99a25af970
Author: elinaawise <47...@users.noreply.github.com>
AuthorDate: Fri Apr 26 16:22:53 2019 -0400

    Fix #42 Update compactionStrategy example (#49)
---
 docs/compactionStrategy.md                         | 69 +++++++++++++---------
 .../examples/client/RandomBatchScanner.java        |  2 +-
 .../examples/client/SequentialBatchWriter.java     | 44 ++++++++------
 3 files changed, 69 insertions(+), 46 deletions(-)

diff --git a/docs/compactionStrategy.md b/docs/compactionStrategy.md
index 6b5bebc..17d0c6a 100644
--- a/docs/compactionStrategy.md
+++ b/docs/compactionStrategy.md
@@ -21,45 +21,58 @@ This tutorial uses the following Java classes, which can be found in org.apache.
  * DefaultCompactionStrategy.java - determines which files to compact based on table.compaction.major.ratio and table.file.max
  * EverythingCompactionStrategy.java - compacts all files
  * SizeLimitCompactionStrategy.java - compacts files no bigger than table.majc.compaction.strategy.opts.sizeLimit
- * TwoTierCompactionStrategy.java - uses default compression for smaller files and table.majc.compaction.strategy.opts.file.large.compress.type for larger files
+ * BasicCompactionStrategy.java - uses default compression table.majc.compaction.strategy.opts.filter.size to filter input 
+                                  files based on size set and table.majc.compaction.strategy.opts.large.compress.threshold
+                                  and table.majc.compaction.strategy.opts.file.large.compress.type for larger files.                            
+                                  
 
 This is an example of how to configure a compaction strategy. By default Accumulo will always use the DefaultCompactionStrategy, unless 
 these steps are taken to change the configuration.  Use the strategy and settings that best fits your Accumulo setup. This example shows
-how to configure and test one of the more complicated strategies, the TwoTierCompactionStrategy. Note that this example requires hadoop
+how to configure and test one of the more complicated strategies, the BasicCompactionStrategy. Note that this example requires hadoop
 native libraries built with snappy in order to use snappy compression.
 
-To begin, run the command to create a table for testing:
-
-    $ accumulo shell -u root -p secret -e "createtable test1"
-
-The command below sets the compression for smaller files and minor compactions for that table.
-
-    $ accumulo shell -u root -p secret -e "config -s table.file.compress.type=snappy -t test1"
-
-The commands below will configure the TwoTierCompactionStrategy to use gz compression for files larger than 1M. 
-
-    $ accumulo shell -u root -p secret -e "config -s table.majc.compaction.strategy.opts.file.large.compress.threshold=1M -t test1"
-    $ accumulo shell -u root -p secret -e "config -s table.majc.compaction.strategy.opts.file.large.compress.type=gz -t test1"
-    $ accumulo shell -u root -p secret -e "config -s table.majc.compaction.strategy=org.apache.accumulo.tserver.compaction.TwoTierCompactionStrategy -t test1"
+To begin, run the command to create a table for testing.
+
+```bash
+$ accumulo shell -u <username> -p <password> -e "createtable test1"
+```
+
+The commands below will configure the BasicCompactionStrategy to:
+ - Avoid compacting files over 250M
+ - Compact files over 100M using gz
+ - Compact files less than 100M using snappy.
+ 
+```bash
+ $ accumulo shell -u <username> -p <password> -e "config -t test1 -s table.file.compress.type=snappy"
+ $ accumulo shell -u <username> -p <password> -e "config -t test1 -s table.majc.compaction.strategy=org.apache.accumulo.tserver.compaction.strategies.BasicCompactionStrategy"
+ $ accumulo shell -u <username> -p <password> -e "config -t test1 -s table.majc.compaction.strategy.opts.filter.size=250M"
+ $ accumulo shell -u <username> -p <password> -e "config -t test1 -s table.majc.compaction.strategy.opts.large.compress.threshold=100M"
+ $ accumulo shell -u <username> -p <password> -e "config -t test1 -s table.majc.compaction.strategy.opts.large.compress.type=gz"
+```
 
 Generate some data and files in order to test the strategy:
 
-    $ ./bin/runex client.SequentialBatchWriter -t test1 --start 0 --num 10000 --size 50 --batchMemory 20M --batchLatency 500 --batchThreads 20
-    $ accumulo shell -u root -p secret -e "flush -t test1"
-    $ ./bin/runex client.SequentialBatchWriter -t test1 --start 0 --num 11000 --size 50 --batchMemory 20M --batchLatency 500 --batchThreads 20
-    $ accumulo shell -u root -p secret -e "flush -t test1"
-    $ ./bin/runex client.SequentialBatchWriter -t test1 --start 0 --num 12000 --size 50 --batchMemory 20M --batchLatency 500 --batchThreads 20
-    $ accumulo shell -u root -p secret -e "flush -t test1"
-    $ ./bin/runex client.SequentialBatchWriter -t test1 --start 0 --num 13000 --size 50 --batchMemory 20M --batchLatency 500 --batchThreads 20
-    $ accumulo shell -u root -p secret -e "flush -t test1"
+```bash
+$ ./bin/runex client.SequentialBatchWriter -t test1 --start 0 --num 10000 --size 50
+$ accumulo shell -u <username> -p <password> -e "flush -t test1"
+$ ./bin/runex client.SequentialBatchWriter -t test1 --start 0 --num 11000 --size 50
+$ accumulo shell -u <username> -p <password> -e "flush -t test1"
+$ ./bin/runex client.SequentialBatchWriter -t test1 --start 0 --num 12000 --size 50
+$ accumulo shell -u <username> -p <password> -e "flush -t test1"
+$ ./bin/runex client.SequentialBatchWriter -t test1 --start 0 --num 13000 --size 50
+$ accumulo shell -u <username> -p <password> -e "flush -t test1"
+```
 
 View the tserver log in <accumulo_home>/logs for the compaction and find the name of the <rfile> that was compacted for your table. Print info about this file using the PrintInfo tool:
 
-    $ accumulo rfile-info <rfile>
-
+```bash
+$ accumulo rfile-info <rfile>
+```
 Details about the rfile will be printed and the compression type should match the type used in the compaction...
+
+```bash    
 Meta block     : RFile.index
-      Raw size             : 512 bytes
-      Compressed size      : 278 bytes
+      Raw size             : 319 bytes
+      Compressed size      : 180 bytes
       Compression type     : gz
-
+```
\ No newline at end of file
diff --git a/src/main/java/org/apache/accumulo/examples/client/RandomBatchScanner.java b/src/main/java/org/apache/accumulo/examples/client/RandomBatchScanner.java
index 5ecafc3..df955ae 100644
--- a/src/main/java/org/apache/accumulo/examples/client/RandomBatchScanner.java
+++ b/src/main/java/org/apache/accumulo/examples/client/RandomBatchScanner.java
@@ -86,7 +86,7 @@ public class RandomBatchScanner {
           String row = key.getRow().toString();
           long rowId = Integer.parseInt(row.split("_")[1]);
 
-          Value expectedValue = SequentialBatchWriter.createValue(rowId);
+          Value expectedValue = SequentialBatchWriter.createValue(rowId, 50);
 
           if (!Arrays.equals(expectedValue.get(), value.get())) {
             log.error("Unexpected value for key: {} expected: {} actual: {}", key,
diff --git a/src/main/java/org/apache/accumulo/examples/client/SequentialBatchWriter.java b/src/main/java/org/apache/accumulo/examples/client/SequentialBatchWriter.java
index 8fa1c99..79817ca 100644
--- a/src/main/java/org/apache/accumulo/examples/client/SequentialBatchWriter.java
+++ b/src/main/java/org/apache/accumulo/examples/client/SequentialBatchWriter.java
@@ -16,21 +16,16 @@
  */
 package org.apache.accumulo.examples.client;
 
-import java.util.Random;
-
-import org.apache.accumulo.core.client.Accumulo;
-import org.apache.accumulo.core.client.AccumuloClient;
-import org.apache.accumulo.core.client.AccumuloException;
-import org.apache.accumulo.core.client.AccumuloSecurityException;
-import org.apache.accumulo.core.client.BatchWriter;
-import org.apache.accumulo.core.client.TableExistsException;
-import org.apache.accumulo.core.client.TableNotFoundException;
+import com.beust.jcommander.Parameter;
+import org.apache.accumulo.core.client.*;
 import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.examples.cli.ClientOpts;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.util.Random;
+
 /**
  * Simple example for writing random data in sequential order to Accumulo.
  */
@@ -38,9 +33,9 @@ public class SequentialBatchWriter {
 
   private static final Logger log = LoggerFactory.getLogger(SequentialBatchWriter.class);
 
-  public static Value createValue(long rowId) {
+  public static Value createValue(long rowId, int size) {
     Random r = new Random(rowId);
-    byte value[] = new byte[50];
+    byte value[] = new byte[size];
 
     r.nextBytes(value);
 
@@ -52,6 +47,20 @@ public class SequentialBatchWriter {
     return new Value(value);
   }
 
+  static class Opts extends ClientOpts {
+    @Parameter(names = {"-t"}, description = "table to use")
+    public String tableName = "batch";
+
+    @Parameter(names = {"--start"}, description = "starting row")
+    public Integer start = 0;
+
+    @Parameter(names = {"--num"}, description = "number of rows")
+    public Integer num = 10_000;
+
+    @Parameter(names = {"--size"}, description = "size of values")
+    public Integer size = 50;
+  }
+
   /**
    * Writes 1000 entries to Accumulo using a {@link BatchWriter}. The rows of the entries will be
    * sequential starting from 0. The column families will be "foo" and column qualifiers will be
@@ -59,21 +68,22 @@ public class SequentialBatchWriter {
    */
   public static void main(String[] args)
       throws AccumuloException, AccumuloSecurityException, TableNotFoundException {
-    ClientOpts opts = new ClientOpts();
+    Opts opts = new Opts();
     opts.parseArgs(SequentialBatchWriter.class.getName(), args);
 
     try (AccumuloClient client = Accumulo.newClient().from(opts.getClientPropsPath()).build()) {
       try {
-        client.tableOperations().create("batch");
+        client.tableOperations().create(opts.tableName);
       } catch (TableExistsException e) {
         // ignore
       }
 
-      try (BatchWriter bw = client.createBatchWriter("batch")) {
-        for (int i = 0; i < 10000; i++) {
-          Mutation m = new Mutation(String.format("row_%010d", i));
+      try (BatchWriter bw = client.createBatchWriter(opts.tableName)) {
+        for (int i = 0; i < opts.num; i++) {
+          int row = i + opts.start;
+          Mutation m = new Mutation(String.format("row_%010d", row));
           // create a random value that is a function of row id for verification purposes
-          m.put("foo", "1", createValue(i));
+          m.put("foo", "1", createValue(row, opts.size));
           bw.addMutation(m);
           if (i % 1000 == 0) {
             log.trace("wrote {} entries", i);