You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by kt...@apache.org on 2015/09/21 15:51:26 UTC

[1/7] accumulo git commit: ACCUMULO-3913 Added per table sampling

Repository: accumulo
Updated Branches:
  refs/heads/master fdcc1698c -> 45f18c174


http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/shell/src/main/java/org/apache/accumulo/shell/commands/CompactCommand.java
----------------------------------------------------------------------
diff --git a/shell/src/main/java/org/apache/accumulo/shell/commands/CompactCommand.java b/shell/src/main/java/org/apache/accumulo/shell/commands/CompactCommand.java
index f183b25..c8b0e11 100644
--- a/shell/src/main/java/org/apache/accumulo/shell/commands/CompactCommand.java
+++ b/shell/src/main/java/org/apache/accumulo/shell/commands/CompactCommand.java
@@ -38,7 +38,7 @@ public class CompactCommand extends TableOperation {
 
   // file selection and file output options
   private Option enameOption, epathOption, sizeLtOption, sizeGtOption, minFilesOption, outBlockSizeOpt, outHdfsBlockSizeOpt, outIndexBlockSizeOpt,
-      outCompressionOpt, outReplication;
+      outCompressionOpt, outReplication, enoSampleOption;
 
   private CompactionConfig compactionConfig = null;
 
@@ -89,6 +89,7 @@ public class CompactCommand extends TableOperation {
   private Map<String,String> getConfigurableCompactionStrategyOpts(CommandLine cl) {
     Map<String,String> opts = new HashMap<>();
 
+    put(cl, opts, enoSampleOption, CompactionSettings.SF_NO_SAMPLE);
     put(cl, opts, enameOption, CompactionSettings.SF_NAME_RE_OPT);
     put(cl, opts, epathOption, CompactionSettings.SF_PATH_RE_OPT);
     put(cl, opts, sizeLtOption, CompactionSettings.SF_LT_ESIZE_OPT);
@@ -190,6 +191,9 @@ public class CompactCommand extends TableOperation {
     cancelOpt = new Option(null, "cancel", false, "cancel user initiated compactions");
     opts.addOption(cancelOpt);
 
+    enoSampleOption = new Option(null, "sf-no-sample", false,
+        "Select files that have no sample data or sample data that differes from the table configuration.");
+    opts.addOption(enoSampleOption);
     enameOption = newLAO("sf-ename", "Select files using regular expression to match file names. Only matches against last part of path.");
     opts.addOption(enameOption);
     epathOption = newLAO("sf-epath", "Select files using regular expression to match file paths to compact. Matches against full path.");

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/shell/src/main/java/org/apache/accumulo/shell/commands/GrepCommand.java
----------------------------------------------------------------------
diff --git a/shell/src/main/java/org/apache/accumulo/shell/commands/GrepCommand.java b/shell/src/main/java/org/apache/accumulo/shell/commands/GrepCommand.java
index 97bddc9..44ee93c 100644
--- a/shell/src/main/java/org/apache/accumulo/shell/commands/GrepCommand.java
+++ b/shell/src/main/java/org/apache/accumulo/shell/commands/GrepCommand.java
@@ -61,6 +61,8 @@ public class GrepCommand extends ScanCommand {
 
     scanner.setTimeout(getTimeout(cl), TimeUnit.MILLISECONDS);
 
+    setupSampling(tableName, cl, shellState, scanner);
+
     for (int i = 0; i < cl.getArgs().length; i++) {
       setUpIterator(Integer.MAX_VALUE - cl.getArgs().length + i, "grep" + i, cl.getArgs()[i], scanner, cl);
     }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/shell/src/main/java/org/apache/accumulo/shell/commands/ScanCommand.java
----------------------------------------------------------------------
diff --git a/shell/src/main/java/org/apache/accumulo/shell/commands/ScanCommand.java b/shell/src/main/java/org/apache/accumulo/shell/commands/ScanCommand.java
index 3531fe9..595829b 100644
--- a/shell/src/main/java/org/apache/accumulo/shell/commands/ScanCommand.java
+++ b/shell/src/main/java/org/apache/accumulo/shell/commands/ScanCommand.java
@@ -26,9 +26,11 @@ import java.util.concurrent.TimeUnit;
 import org.apache.accumulo.core.client.AccumuloException;
 import org.apache.accumulo.core.client.AccumuloSecurityException;
 import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.SampleNotPresentException;
 import org.apache.accumulo.core.client.Scanner;
 import org.apache.accumulo.core.client.ScannerBase;
 import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Range;
@@ -60,6 +62,19 @@ public class ScanCommand extends Command {
   private Option optEndRowExclusive;
   private Option timeoutOption;
   private Option profileOpt;
+  private Option sampleOpt;
+
+  protected void setupSampling(final String tableName, final CommandLine cl, final Shell shellState, ScannerBase scanner) throws TableNotFoundException,
+      AccumuloException, AccumuloSecurityException {
+    if (getUseSample(cl)) {
+      SamplerConfiguration samplerConfig = shellState.getConnector().tableOperations().getSamplerConfiguration(tableName);
+      if (samplerConfig == null) {
+        throw new SampleNotPresentException("Table " + tableName + " does not have sampling configured");
+      }
+      Shell.log.debug("Using sampling configuration : " + samplerConfig);
+      scanner.setSamplerConfiguration(samplerConfig);
+    }
+  }
 
   @Override
   public int execute(final String fullCommand, final CommandLine cl, final Shell shellState) throws Exception {
@@ -86,6 +101,8 @@ public class ScanCommand extends Command {
     // set timeout
     scanner.setTimeout(getTimeout(cl), TimeUnit.MILLISECONDS);
 
+    setupSampling(tableName, cl, shellState, scanner);
+
     // output the records
     if (cl.hasOption(showFewOpt.getOpt())) {
       final String showLength = cl.getOptionValue(showFewOpt.getOpt());
@@ -112,6 +129,10 @@ public class ScanCommand extends Command {
     return 0;
   }
 
+  protected boolean getUseSample(CommandLine cl) {
+    return cl.hasOption(sampleOpt.getLongOpt());
+  }
+
   protected long getTimeout(final CommandLine cl) {
     if (cl.hasOption(timeoutOption.getLongOpt())) {
       return AccumuloConfiguration.getTimeInMillis(cl.getOptionValue(timeoutOption.getLongOpt()));
@@ -294,6 +315,7 @@ public class ScanCommand extends Command {
     timeoutOption = new Option(null, "timeout", true,
         "time before scan should fail if no data is returned. If no unit is given assumes seconds.  Units d,h,m,s,and ms are supported.  e.g. 30s or 100ms");
     outputFileOpt = new Option("o", "output", true, "local file to write the scan output to");
+    sampleOpt = new Option(null, "sample", false, "Show sample");
 
     scanOptAuths.setArgName("comma-separated-authorizations");
     scanOptRow.setArgName("row");
@@ -324,6 +346,7 @@ public class ScanCommand extends Command {
     o.addOption(timeoutOption);
     o.addOption(outputFileOpt);
     o.addOption(profileOpt);
+    o.addOption(sampleOpt);
 
     return o;
   }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/start/.gitignore
----------------------------------------------------------------------
diff --git a/start/.gitignore b/start/.gitignore
index 56204d2..e7d7fb1 100644
--- a/start/.gitignore
+++ b/start/.gitignore
@@ -23,3 +23,4 @@
 /.pydevproject
 /.idea
 /*.iml
+/target/

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/test/src/main/java/org/apache/accumulo/test/InMemoryMapMemoryUsageTest.java
----------------------------------------------------------------------
diff --git a/test/src/main/java/org/apache/accumulo/test/InMemoryMapMemoryUsageTest.java b/test/src/main/java/org/apache/accumulo/test/InMemoryMapMemoryUsageTest.java
index fb0050f..05b405e 100644
--- a/test/src/main/java/org/apache/accumulo/test/InMemoryMapMemoryUsageTest.java
+++ b/test/src/main/java/org/apache/accumulo/test/InMemoryMapMemoryUsageTest.java
@@ -18,9 +18,11 @@ package org.apache.accumulo.test;
 
 import java.util.Collections;
 
+import org.apache.accumulo.core.conf.DefaultConfiguration;
 import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.security.ColumnVisibility;
+import org.apache.accumulo.core.util.LocalityGroupUtil.LocalityGroupConfigurationError;
 import org.apache.accumulo.tserver.InMemoryMap;
 import org.apache.hadoop.io.Text;
 
@@ -51,7 +53,11 @@ class InMemoryMapMemoryUsageTest extends MemoryUsageTest {
 
   @Override
   void init() {
-    imm = new InMemoryMap(false, "/tmp");
+    try {
+      imm = new InMemoryMap(DefaultConfiguration.getInstance());
+    } catch (LocalityGroupConfigurationError e) {
+      throw new RuntimeException(e);
+    }
     key = new Text();
 
     colf = new Text(String.format("%0" + colFamLen + "d", 0));

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/test/src/main/java/org/apache/accumulo/test/SampleIT.java
----------------------------------------------------------------------
diff --git a/test/src/main/java/org/apache/accumulo/test/SampleIT.java b/test/src/main/java/org/apache/accumulo/test/SampleIT.java
new file mode 100644
index 0000000..423b955
--- /dev/null
+++ b/test/src/main/java/org/apache/accumulo/test/SampleIT.java
@@ -0,0 +1,497 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.test;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.TreeMap;
+
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.BatchScanner;
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.BatchWriterConfig;
+import org.apache.accumulo.core.client.ClientSideIteratorScanner;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.IsolatedScanner;
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.MutationsRejectedException;
+import org.apache.accumulo.core.client.SampleNotPresentException;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.ScannerBase;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.admin.CompactionConfig;
+import org.apache.accumulo.core.client.admin.NewTableConfiguration;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
+import org.apache.accumulo.core.client.impl.Credentials;
+import org.apache.accumulo.core.client.impl.OfflineScanner;
+import org.apache.accumulo.core.data.ByteSequence;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.IteratorEnvironment;
+import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
+import org.apache.accumulo.core.iterators.WrappingIterator;
+import org.apache.accumulo.core.sample.RowSampler;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.harness.AccumuloClusterHarness;
+import org.junit.Assert;
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Iterables;
+
+public class SampleIT extends AccumuloClusterHarness {
+
+  private static final Map<String,String> OPTIONS_1 = ImmutableMap.of("hasher", "murmur3_32", "modulus", "1009");
+  private static final Map<String,String> OPTIONS_2 = ImmutableMap.of("hasher", "murmur3_32", "modulus", "997");
+
+  private static final SamplerConfiguration SC1 = new SamplerConfiguration(RowSampler.class.getName()).setOptions(OPTIONS_1);
+  private static final SamplerConfiguration SC2 = new SamplerConfiguration(RowSampler.class.getName()).setOptions(OPTIONS_2);
+
+  public static class IteratorThatUsesSample extends WrappingIterator {
+    private SortedKeyValueIterator<Key,Value> sampleDC;
+    private boolean hasTop;
+
+    @Override
+    public boolean hasTop() {
+      return hasTop && super.hasTop();
+    }
+
+    @Override
+    public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {
+
+      int sampleCount = 0;
+      sampleDC.seek(range, columnFamilies, inclusive);
+
+      while (sampleDC.hasTop()) {
+        sampleCount++;
+        sampleDC.next();
+      }
+
+      if (sampleCount < 10) {
+        hasTop = true;
+        super.seek(range, columnFamilies, inclusive);
+      } else {
+        // its too much data
+        hasTop = false;
+      }
+    }
+
+    @Override
+    public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {
+      super.init(source, options, env);
+
+      IteratorEnvironment sampleEnv = env.cloneWithSamplingEnabled();
+
+      sampleDC = source.deepCopy(sampleEnv);
+    }
+  }
+
+  @Test
+  public void testBasic() throws Exception {
+
+    Connector conn = getConnector();
+    String tableName = getUniqueNames(1)[0];
+    String clone = tableName + "_clone";
+
+    conn.tableOperations().create(tableName, new NewTableConfiguration().enableSampling(SC1));
+
+    BatchWriter bw = conn.createBatchWriter(tableName, new BatchWriterConfig());
+
+    TreeMap<Key,Value> expected = new TreeMap<Key,Value>();
+    String someRow = writeData(bw, SC1, expected);
+
+    Scanner scanner = conn.createScanner(tableName, Authorizations.EMPTY);
+    Scanner isoScanner = new IsolatedScanner(conn.createScanner(tableName, Authorizations.EMPTY));
+    Scanner csiScanner = new ClientSideIteratorScanner(conn.createScanner(tableName, Authorizations.EMPTY));
+    scanner.setSamplerConfiguration(SC1);
+    csiScanner.setSamplerConfiguration(SC1);
+    isoScanner.setSamplerConfiguration(SC1);
+    isoScanner.setBatchSize(10);
+
+    BatchScanner bScanner = conn.createBatchScanner(tableName, Authorizations.EMPTY, 2);
+    bScanner.setSamplerConfiguration(SC1);
+    bScanner.setRanges(Arrays.asList(new Range()));
+
+    check(expected, scanner, bScanner, isoScanner, csiScanner);
+
+    conn.tableOperations().flush(tableName, null, null, true);
+
+    Scanner oScanner = newOfflineScanner(conn, tableName, clone, SC1);
+    check(expected, scanner, bScanner, isoScanner, csiScanner, oScanner);
+
+    // ensure non sample data can be scanned after scanning sample data
+    for (ScannerBase sb : Arrays.asList(scanner, bScanner, isoScanner, csiScanner, oScanner)) {
+      sb.clearSamplerConfiguration();
+      Assert.assertEquals(20000, Iterables.size(sb));
+      sb.setSamplerConfiguration(SC1);
+    }
+
+    Iterator<Key> it = expected.keySet().iterator();
+    while (it.hasNext()) {
+      Key k = it.next();
+      if (k.getRow().toString().equals(someRow)) {
+        it.remove();
+      }
+    }
+
+    expected.put(new Key(someRow, "cf1", "cq1", 8), new Value("42".getBytes()));
+    expected.put(new Key(someRow, "cf1", "cq3", 8), new Value("suprise".getBytes()));
+
+    Mutation m = new Mutation(someRow);
+
+    m.put("cf1", "cq1", 8, "42");
+    m.putDelete("cf1", "cq2", 8);
+    m.put("cf1", "cq3", 8, "suprise");
+
+    bw.addMutation(m);
+    bw.close();
+
+    check(expected, scanner, bScanner, isoScanner, csiScanner);
+
+    conn.tableOperations().flush(tableName, null, null, true);
+
+    oScanner = newOfflineScanner(conn, tableName, clone, SC1);
+    check(expected, scanner, bScanner, isoScanner, csiScanner, oScanner);
+
+    scanner.setRange(new Range(someRow));
+    isoScanner.setRange(new Range(someRow));
+    csiScanner.setRange(new Range(someRow));
+    oScanner.setRange(new Range(someRow));
+    bScanner.setRanges(Arrays.asList(new Range(someRow)));
+
+    expected.clear();
+
+    expected.put(new Key(someRow, "cf1", "cq1", 8), new Value("42".getBytes()));
+    expected.put(new Key(someRow, "cf1", "cq3", 8), new Value("suprise".getBytes()));
+
+    check(expected, scanner, bScanner, isoScanner, csiScanner, oScanner);
+
+    bScanner.close();
+  }
+
+  private Scanner newOfflineScanner(Connector conn, String tableName, String clone, SamplerConfiguration sc) throws Exception {
+    if (conn.tableOperations().exists(clone)) {
+      conn.tableOperations().delete(clone);
+    }
+    Map<String,String> em = Collections.emptyMap();
+    Set<String> es = Collections.emptySet();
+    conn.tableOperations().clone(tableName, clone, false, em, es);
+    conn.tableOperations().offline(clone, true);
+    String cloneID = conn.tableOperations().tableIdMap().get(clone);
+    OfflineScanner oScanner = new OfflineScanner(conn.getInstance(), new Credentials(getAdminPrincipal(), getAdminToken()), cloneID, Authorizations.EMPTY);
+    if (sc != null) {
+      oScanner.setSamplerConfiguration(sc);
+    }
+    return oScanner;
+  }
+
+  private void updateExpected(SamplerConfiguration sc, TreeMap<Key,Value> expected) {
+    expected.clear();
+
+    RowSampler sampler = new RowSampler();
+    sampler.init(sc);
+
+    for (int i = 0; i < 10000; i++) {
+      String row = String.format("r_%06d", i);
+
+      Key k1 = new Key(row, "cf1", "cq1", 7);
+      if (sampler.accept(k1)) {
+        expected.put(k1, new Value(("" + i).getBytes()));
+      }
+
+      Key k2 = new Key(row, "cf1", "cq2", 7);
+      if (sampler.accept(k2)) {
+        expected.put(k2, new Value(("" + (100000000 - i)).getBytes()));
+      }
+    }
+  }
+
+  private String writeData(BatchWriter bw, SamplerConfiguration sc, TreeMap<Key,Value> expected) throws MutationsRejectedException {
+    int count = 0;
+    String someRow = null;
+
+    RowSampler sampler = new RowSampler();
+    sampler.init(sc);
+
+    for (int i = 0; i < 10000; i++) {
+      String row = String.format("r_%06d", i);
+      Mutation m = new Mutation(row);
+
+      m.put("cf1", "cq1", 7, "" + i);
+      m.put("cf1", "cq2", 7, "" + (100000000 - i));
+
+      bw.addMutation(m);
+
+      Key k1 = new Key(row, "cf1", "cq1", 7);
+      if (sampler.accept(k1)) {
+        expected.put(k1, new Value(("" + i).getBytes()));
+        count++;
+        if (count == 5) {
+          someRow = row;
+        }
+      }
+
+      Key k2 = new Key(row, "cf1", "cq2", 7);
+      if (sampler.accept(k2)) {
+        expected.put(k2, new Value(("" + (100000000 - i)).getBytes()));
+      }
+    }
+
+    bw.flush();
+
+    return someRow;
+  }
+
+  private int countEntries(Iterable<Entry<Key,Value>> scanner) {
+
+    int count = 0;
+    Iterator<Entry<Key,Value>> iter = scanner.iterator();
+
+    while (iter.hasNext()) {
+      iter.next();
+      count++;
+    }
+
+    return count;
+  }
+
+  private void setRange(Range range, List<? extends ScannerBase> scanners) {
+    for (ScannerBase s : scanners) {
+      if (s instanceof Scanner) {
+        ((Scanner) s).setRange(range);
+      } else {
+        ((BatchScanner) s).setRanges(Collections.singleton(range));
+      }
+
+    }
+  }
+
+  @Test
+  public void testIterator() throws Exception {
+    Connector conn = getConnector();
+    String tableName = getUniqueNames(1)[0];
+    String clone = tableName + "_clone";
+
+    conn.tableOperations().create(tableName, new NewTableConfiguration().enableSampling(SC1));
+
+    BatchWriter bw = conn.createBatchWriter(tableName, new BatchWriterConfig());
+
+    TreeMap<Key,Value> expected = new TreeMap<Key,Value>();
+    writeData(bw, SC1, expected);
+
+    ArrayList<Key> keys = new ArrayList<>(expected.keySet());
+
+    Range range1 = new Range(keys.get(6), true, keys.get(11), true);
+
+    Scanner scanner = conn.createScanner(tableName, Authorizations.EMPTY);
+    Scanner isoScanner = new IsolatedScanner(conn.createScanner(tableName, Authorizations.EMPTY));
+    ClientSideIteratorScanner csiScanner = new ClientSideIteratorScanner(conn.createScanner(tableName, Authorizations.EMPTY));
+    BatchScanner bScanner = conn.createBatchScanner(tableName, Authorizations.EMPTY, 2);
+
+    csiScanner.setIteratorSamplerConfiguration(SC1);
+
+    List<? extends ScannerBase> scanners = Arrays.asList(scanner, isoScanner, bScanner, csiScanner);
+
+    for (ScannerBase s : scanners) {
+      s.addScanIterator(new IteratorSetting(100, IteratorThatUsesSample.class));
+    }
+
+    // the iterator should see less than 10 entries in sample data, and return data
+    setRange(range1, scanners);
+    for (ScannerBase s : scanners) {
+      Assert.assertEquals(2954, countEntries(s));
+    }
+
+    Range range2 = new Range(keys.get(5), true, keys.get(18), true);
+    setRange(range2, scanners);
+
+    // the iterator should see more than 10 entries in sample data, and return no data
+    for (ScannerBase s : scanners) {
+      Assert.assertEquals(0, countEntries(s));
+    }
+
+    // flush an rerun same test against files
+    conn.tableOperations().flush(tableName, null, null, true);
+
+    Scanner oScanner = newOfflineScanner(conn, tableName, clone, null);
+    oScanner.addScanIterator(new IteratorSetting(100, IteratorThatUsesSample.class));
+    scanners = Arrays.asList(scanner, isoScanner, bScanner, csiScanner, oScanner);
+
+    setRange(range1, scanners);
+    for (ScannerBase s : scanners) {
+      Assert.assertEquals(2954, countEntries(s));
+    }
+
+    setRange(range2, scanners);
+    for (ScannerBase s : scanners) {
+      Assert.assertEquals(0, countEntries(s));
+    }
+
+    updateSamplingConfig(conn, tableName, SC2);
+
+    csiScanner.setIteratorSamplerConfiguration(SC2);
+
+    oScanner = newOfflineScanner(conn, tableName, clone, null);
+    oScanner.addScanIterator(new IteratorSetting(100, IteratorThatUsesSample.class));
+    scanners = Arrays.asList(scanner, isoScanner, bScanner, csiScanner, oScanner);
+
+    for (ScannerBase s : scanners) {
+      try {
+        countEntries(s);
+        Assert.fail("Expected SampleNotPresentException, but it did not happen : " + s.getClass().getSimpleName());
+      } catch (SampleNotPresentException e) {
+
+      }
+    }
+  }
+
+  private void setSamplerConfig(SamplerConfiguration sc, ScannerBase... scanners) {
+    for (ScannerBase s : scanners) {
+      s.setSamplerConfiguration(sc);
+    }
+  }
+
+  @Test
+  public void testSampleNotPresent() throws Exception {
+
+    Connector conn = getConnector();
+    String tableName = getUniqueNames(1)[0];
+    String clone = tableName + "_clone";
+
+    conn.tableOperations().create(tableName);
+
+    BatchWriter bw = conn.createBatchWriter(tableName, new BatchWriterConfig());
+
+    TreeMap<Key,Value> expected = new TreeMap<Key,Value>();
+    writeData(bw, SC1, expected);
+
+    Scanner scanner = conn.createScanner(tableName, Authorizations.EMPTY);
+    Scanner isoScanner = new IsolatedScanner(conn.createScanner(tableName, Authorizations.EMPTY));
+    isoScanner.setBatchSize(10);
+    Scanner csiScanner = new ClientSideIteratorScanner(conn.createScanner(tableName, Authorizations.EMPTY));
+    BatchScanner bScanner = conn.createBatchScanner(tableName, Authorizations.EMPTY, 2);
+    bScanner.setRanges(Arrays.asList(new Range()));
+
+    // ensure sample not present exception occurs when sampling is not configured
+    assertSampleNotPresent(SC1, scanner, isoScanner, bScanner, csiScanner);
+
+    conn.tableOperations().flush(tableName, null, null, true);
+
+    Scanner oScanner = newOfflineScanner(conn, tableName, clone, SC1);
+    assertSampleNotPresent(SC1, scanner, isoScanner, bScanner, csiScanner, oScanner);
+
+    // configure sampling, however there exist an rfile w/o sample data... so should still see sample not present exception
+
+    updateSamplingConfig(conn, tableName, SC1);
+
+    // create clone with new config
+    oScanner = newOfflineScanner(conn, tableName, clone, SC1);
+
+    assertSampleNotPresent(SC1, scanner, isoScanner, bScanner, csiScanner, oScanner);
+
+    // create rfile with sample data present
+    conn.tableOperations().compact(tableName, new CompactionConfig().setWait(true));
+
+    // should be able to scan sample now
+    oScanner = newOfflineScanner(conn, tableName, clone, SC1);
+    setSamplerConfig(SC1, scanner, csiScanner, isoScanner, bScanner, oScanner);
+    check(expected, scanner, isoScanner, bScanner, csiScanner, oScanner);
+
+    // change sampling config
+    updateSamplingConfig(conn, tableName, SC2);
+
+    // create clone with new config
+    oScanner = newOfflineScanner(conn, tableName, clone, SC2);
+
+    // rfile should have different sample config than table, and scan should not work
+    assertSampleNotPresent(SC2, scanner, isoScanner, bScanner, csiScanner, oScanner);
+
+    // create rfile that has same sample data as table config
+    conn.tableOperations().compact(tableName, new CompactionConfig().setWait(true));
+
+    // should be able to scan sample now
+    updateExpected(SC2, expected);
+    oScanner = newOfflineScanner(conn, tableName, clone, SC2);
+    setSamplerConfig(SC2, scanner, csiScanner, isoScanner, bScanner, oScanner);
+    check(expected, scanner, isoScanner, bScanner, csiScanner, oScanner);
+
+    bScanner.close();
+  }
+
+  private void updateSamplingConfig(Connector conn, String tableName, SamplerConfiguration sc) throws TableNotFoundException, AccumuloException,
+      AccumuloSecurityException {
+    conn.tableOperations().setSamplerConfiguration(tableName, sc);
+    // wait for for config change
+    conn.tableOperations().offline(tableName, true);
+    conn.tableOperations().online(tableName, true);
+  }
+
+  private void assertSampleNotPresent(SamplerConfiguration sc, ScannerBase... scanners) {
+
+    for (ScannerBase scanner : scanners) {
+      SamplerConfiguration csc = scanner.getSamplerConfiguration();
+
+      scanner.setSamplerConfiguration(sc);
+
+      try {
+        for (Iterator<Entry<Key,Value>> i = scanner.iterator(); i.hasNext();) {
+          Entry<Key,Value> entry = i.next();
+          entry.getKey();
+        }
+        Assert.fail("Expected SampleNotPresentException, but it did not happen : " + scanner.getClass().getSimpleName());
+      } catch (SampleNotPresentException e) {
+
+      }
+
+      scanner.clearSamplerConfiguration();
+      for (Iterator<Entry<Key,Value>> i = scanner.iterator(); i.hasNext();) {
+        Entry<Key,Value> entry = i.next();
+        entry.getKey();
+      }
+
+      if (csc == null) {
+        scanner.clearSamplerConfiguration();
+      } else {
+        scanner.setSamplerConfiguration(csc);
+      }
+    }
+  }
+
+  private void check(TreeMap<Key,Value> expected, ScannerBase... scanners) {
+    TreeMap<Key,Value> actual = new TreeMap<>();
+    for (ScannerBase s : scanners) {
+      actual.clear();
+      for (Entry<Key,Value> entry : s) {
+        actual.put(entry.getKey(), entry.getValue());
+      }
+      Assert.assertEquals(String.format("Saw %d instead of %d entries using %s", actual.size(), expected.size(), s.getClass().getSimpleName()), expected,
+          actual);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/test/src/main/java/org/apache/accumulo/test/ShellServerIT.java
----------------------------------------------------------------------
diff --git a/test/src/main/java/org/apache/accumulo/test/ShellServerIT.java b/test/src/main/java/org/apache/accumulo/test/ShellServerIT.java
index e7b5799..ae38fb8 100644
--- a/test/src/main/java/org/apache/accumulo/test/ShellServerIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/ShellServerIT.java
@@ -41,8 +41,6 @@ import java.util.Map.Entry;
 import java.util.Random;
 import java.util.concurrent.TimeUnit;
 
-import jline.console.ConsoleReader;
-
 import org.apache.accumulo.core.Constants;
 import org.apache.accumulo.core.client.ClientConfiguration;
 import org.apache.accumulo.core.client.ClientConfiguration.ClientProperty;
@@ -91,6 +89,8 @@ import org.slf4j.LoggerFactory;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Iterators;
 
+import jline.console.ConsoleReader;
+
 public class ShellServerIT extends SharedMiniClusterBase {
   public static class TestOutputStream extends OutputStream {
     StringBuilder sb = new StringBuilder();
@@ -975,6 +975,26 @@ public class ShellServerIT extends SharedMiniClusterBase {
     ts.exec("compact -t " + clone + " -w --sf-ename F.* --sf-lt-esize 1K");
 
     assertEquals(3, countFiles(cloneId));
+
+    String clone2 = table + "_clone_2";
+    ts.exec("clonetable -s table.sampler.opt.hasher=murmur3_32,table.sampler.opt.modulus=7,table.sampler=org.apache.accumulo.core.sample.RowSampler " + clone
+        + " " + clone2);
+    String clone2Id = getTableId(clone2);
+
+    assertEquals(3, countFiles(clone2Id));
+
+    ts.exec("table " + clone2);
+    ts.exec("insert v n l o");
+    ts.exec("flush -w");
+
+    ts.exec("insert x n l o");
+    ts.exec("flush -w");
+
+    assertEquals(5, countFiles(clone2Id));
+
+    ts.exec("compact -t " + clone2 + " -w --sf-no-sample");
+
+    assertEquals(3, countFiles(clone2Id));
   }
 
   @Test
@@ -989,6 +1009,54 @@ public class ShellServerIT extends SharedMiniClusterBase {
   }
 
   @Test
+  public void testScanScample() throws Exception {
+    final String table = name.getMethodName();
+
+    // compact
+    ts.exec("createtable " + table);
+
+    ts.exec("insert 9255 doc content 'abcde'");
+    ts.exec("insert 9255 doc url file://foo.txt");
+    ts.exec("insert 8934 doc content 'accumulo scales'");
+    ts.exec("insert 8934 doc url file://accumulo_notes.txt");
+    ts.exec("insert 2317 doc content 'milk, eggs, bread, parmigiano-reggiano'");
+    ts.exec("insert 2317 doc url file://groceries/9.txt");
+    ts.exec("insert 3900 doc content 'EC2 ate my homework'");
+    ts.exec("insert 3900 doc uril file://final_project.txt");
+
+    String clone1 = table + "_clone_1";
+    ts.exec("clonetable -s table.sampler.opt.hasher=murmur3_32,table.sampler.opt.modulus=3,table.sampler=org.apache.accumulo.core.sample.RowSampler " + table
+        + " " + clone1);
+
+    ts.exec("compact -t " + clone1 + " -w --sf-no-sample");
+
+    ts.exec("table " + clone1);
+    ts.exec("scan --sample", true, "parmigiano-reggiano", true);
+    ts.exec("grep --sample reg", true, "parmigiano-reggiano", true);
+    ts.exec("scan --sample", true, "accumulo", false);
+    ts.exec("grep --sample acc", true, "accumulo", false);
+
+    // create table where table sample config differs from whats in file
+    String clone2 = table + "_clone_2";
+    ts.exec("clonetable -s table.sampler.opt.hasher=murmur3_32,table.sampler.opt.modulus=2,table.sampler=org.apache.accumulo.core.sample.RowSampler " + clone1
+        + " " + clone2);
+
+    ts.exec("table " + clone2);
+    ts.exec("scan --sample", false, "SampleNotPresentException", true);
+    ts.exec("grep --sample reg", false, "SampleNotPresentException", true);
+
+    ts.exec("compact -t " + clone2 + " -w --sf-no-sample");
+
+    for (String expected : Arrays.asList("2317", "3900", "9255")) {
+      ts.exec("scan --sample", true, expected, true);
+      ts.exec("grep --sample " + expected.substring(0, 2), true, expected, true);
+    }
+
+    ts.exec("scan --sample", true, "8934", false);
+    ts.exec("grep --sample 89", true, "8934", false);
+  }
+
+  @Test
   public void constraint() throws Exception {
     final String table = name.getMethodName();
 

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/test/src/main/java/org/apache/accumulo/test/functional/ExamplesIT.java
----------------------------------------------------------------------
diff --git a/test/src/main/java/org/apache/accumulo/test/functional/ExamplesIT.java b/test/src/main/java/org/apache/accumulo/test/functional/ExamplesIT.java
index 71ddbcd..826907c 100644
--- a/test/src/main/java/org/apache/accumulo/test/functional/ExamplesIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/functional/ExamplesIT.java
@@ -17,6 +17,7 @@
 package org.apache.accumulo.test.functional;
 
 import static com.google.common.base.Charsets.UTF_8;
+import static com.google.common.util.concurrent.Uninterruptibles.sleepUninterruptibly;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
@@ -102,7 +103,6 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import com.google.common.collect.Iterators;
-import static com.google.common.util.concurrent.Uninterruptibles.sleepUninterruptibly;
 
 public class ExamplesIT extends AccumuloClusterHarness {
   private static final Logger log = LoggerFactory.getLogger(ExamplesIT.class);
@@ -390,7 +390,7 @@ public class ExamplesIT extends AccumuloClusterHarness {
     Index.index(30, src, "\\W+", bw);
     bw.close();
     BatchScanner bs = c.createBatchScanner(shard, Authorizations.EMPTY, 4);
-    List<String> found = Query.query(bs, Arrays.asList("foo", "bar"));
+    List<String> found = Query.query(bs, Arrays.asList("foo", "bar"), null);
     bs.close();
     // should find ourselves
     boolean thisFile = false;

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/test/src/main/java/org/apache/accumulo/test/functional/ReadWriteIT.java
----------------------------------------------------------------------
diff --git a/test/src/main/java/org/apache/accumulo/test/functional/ReadWriteIT.java b/test/src/main/java/org/apache/accumulo/test/functional/ReadWriteIT.java
index 485d6d2..3098251 100644
--- a/test/src/main/java/org/apache/accumulo/test/functional/ReadWriteIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/functional/ReadWriteIT.java
@@ -430,8 +430,8 @@ public class ReadWriteIT extends AccumuloClusterHarness {
         PrintInfo.main(args.toArray(new String[args.size()]));
         newOut.flush();
         String stdout = baos.toString();
-        assertTrue(stdout.contains("Locality group         : g1"));
-        assertTrue(stdout.contains("families      : [colf]"));
+        assertTrue(stdout.contains("Locality group           : g1"));
+        assertTrue(stdout.contains("families        : [colf]"));
       } finally {
         newOut.close();
         System.setOut(oldOut);

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/test/src/main/java/org/apache/accumulo/test/mapred/AccumuloFileOutputFormatIT.java
----------------------------------------------------------------------
diff --git a/test/src/main/java/org/apache/accumulo/test/mapred/AccumuloFileOutputFormatIT.java b/test/src/main/java/org/apache/accumulo/test/mapred/AccumuloFileOutputFormatIT.java
index 7a4223d..dd085cc 100644
--- a/test/src/main/java/org/apache/accumulo/test/mapred/AccumuloFileOutputFormatIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/mapred/AccumuloFileOutputFormatIT.java
@@ -30,15 +30,23 @@ import java.io.IOException;
 import org.apache.accumulo.core.client.BatchWriter;
 import org.apache.accumulo.core.client.BatchWriterConfig;
 import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.mapred.AccumuloFileOutputFormat;
 import org.apache.accumulo.core.client.mapred.AccumuloInputFormat;
 import org.apache.accumulo.core.client.mapreduce.lib.impl.ConfiguratorBase;
+import org.apache.accumulo.core.conf.DefaultConfiguration;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.file.FileSKVIterator;
+import org.apache.accumulo.core.file.rfile.RFileOperations;
+import org.apache.accumulo.core.sample.RowSampler;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
+import org.apache.accumulo.core.util.CachedConfiguration;
 import org.apache.accumulo.harness.AccumuloClusterHarness;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
@@ -65,6 +73,9 @@ public class AccumuloFileOutputFormatIT extends AccumuloClusterHarness {
   private static AssertionError e1 = null;
   private static AssertionError e2 = null;
 
+  private static final SamplerConfiguration SAMPLER_CONFIG = new SamplerConfiguration(RowSampler.class.getName()).addOption("hasher", "murmur3_32").addOption(
+      "modulus", "3");
+
   @Rule
   public TemporaryFolder folder = new TemporaryFolder(new File(System.getProperty("user.dir") + "/target"));
 
@@ -141,6 +152,7 @@ public class AccumuloFileOutputFormatIT extends AccumuloClusterHarness {
       AccumuloInputFormat.setConnectorInfo(job, getAdminPrincipal(), getAdminToken());
       AccumuloInputFormat.setInputTableName(job, table);
       AccumuloFileOutputFormat.setOutputPath(job, new Path(args[1]));
+      AccumuloFileOutputFormat.setSampler(job, SAMPLER_CONFIG);
 
       job.setMapperClass(BAD_TABLE.equals(table) ? BadKeyMapper.class : IdentityMapper.class);
       job.setMapOutputKeyClass(Key.class);
@@ -177,6 +189,12 @@ public class AccumuloFileOutputFormatIT extends AccumuloClusterHarness {
     if (content) {
       assertEquals(1, files.length);
       assertTrue(files[0].exists());
+
+      Configuration conf = CachedConfiguration.getInstance();
+      DefaultConfiguration acuconf = DefaultConfiguration.getInstance();
+      FileSKVIterator sample = RFileOperations.getInstance().openReader(files[0].toString(), false, FileSystem.get(conf), conf, acuconf)
+          .getSample(new SamplerConfigurationImpl(SAMPLER_CONFIG));
+      assertNotNull(sample);
     } else {
       assertEquals(0, files.length);
     }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/test/src/main/java/org/apache/accumulo/test/mapred/AccumuloInputFormatIT.java
----------------------------------------------------------------------
diff --git a/test/src/main/java/org/apache/accumulo/test/mapred/AccumuloInputFormatIT.java b/test/src/main/java/org/apache/accumulo/test/mapred/AccumuloInputFormatIT.java
index 2cef382..cd80139 100644
--- a/test/src/main/java/org/apache/accumulo/test/mapred/AccumuloInputFormatIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/mapred/AccumuloInputFormatIT.java
@@ -27,11 +27,14 @@ import java.util.Collections;
 import org.apache.accumulo.core.client.BatchWriter;
 import org.apache.accumulo.core.client.BatchWriterConfig;
 import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.admin.NewTableConfiguration;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.mapred.AccumuloInputFormat;
 import org.apache.accumulo.core.client.mapred.RangeInputSplit;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.sample.RowSampler;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.util.Pair;
 import org.apache.accumulo.harness.AccumuloClusterHarness;
@@ -60,7 +63,9 @@ public class AccumuloInputFormatIT extends AccumuloClusterHarness {
   }
 
   private static AssertionError e1 = null;
+  private static int e1Count = 0;
   private static AssertionError e2 = null;
+  private static int e2Count = 0;
 
   private static class MRTester extends Configured implements Tool {
     private static class TestMapper implements Mapper<Key,Value,Key,Value> {
@@ -76,6 +81,7 @@ public class AccumuloInputFormatIT extends AccumuloClusterHarness {
           assertEquals(new String(v.get()), String.format("%09x", count));
         } catch (AssertionError e) {
           e1 = e;
+          e1Count++;
         }
         key = new Key(k);
         count++;
@@ -90,6 +96,7 @@ public class AccumuloInputFormatIT extends AccumuloClusterHarness {
           assertEquals(100, count);
         } catch (AssertionError e) {
           e2 = e;
+          e2Count++;
         }
       }
 
@@ -98,11 +105,17 @@ public class AccumuloInputFormatIT extends AccumuloClusterHarness {
     @Override
     public int run(String[] args) throws Exception {
 
-      if (args.length != 1) {
-        throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <table>");
+      if (args.length != 1 && args.length != 3) {
+        throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <table> [<batchScan> <scan sample>]");
       }
 
       String table = args[0];
+      Boolean batchScan = false;
+      boolean sample = false;
+      if (args.length == 3) {
+        batchScan = Boolean.parseBoolean(args[1]);
+        sample = Boolean.parseBoolean(args[2]);
+      }
 
       JobConf job = new JobConf(getConf());
       job.setJarByClass(this.getClass());
@@ -112,6 +125,10 @@ public class AccumuloInputFormatIT extends AccumuloClusterHarness {
       AccumuloInputFormat.setConnectorInfo(job, getAdminPrincipal(), getAdminToken());
       AccumuloInputFormat.setInputTableName(job, table);
       AccumuloInputFormat.setZooKeeperInstance(job, getCluster().getClientConfig());
+      AccumuloInputFormat.setBatchScan(job, batchScan);
+      if (sample) {
+        AccumuloInputFormat.setSamplerConfiguration(job, SAMPLER_CONFIG);
+      }
 
       job.setMapperClass(TestMapper.class);
       job.setMapOutputKeyClass(Key.class);
@@ -143,11 +160,47 @@ public class AccumuloInputFormatIT extends AccumuloClusterHarness {
     }
     bw.close();
 
+    e1 = null;
+    e2 = null;
+
     MRTester.main(table);
     assertNull(e1);
     assertNull(e2);
   }
 
+  private static final SamplerConfiguration SAMPLER_CONFIG = new SamplerConfiguration(RowSampler.class.getName()).addOption("hasher", "murmur3_32").addOption(
+      "modulus", "3");
+
+  @Test
+  public void testSample() throws Exception {
+    final String TEST_TABLE_3 = getUniqueNames(1)[0];
+
+    Connector c = getConnector();
+    c.tableOperations().create(TEST_TABLE_3, new NewTableConfiguration().enableSampling(SAMPLER_CONFIG));
+    BatchWriter bw = c.createBatchWriter(TEST_TABLE_3, new BatchWriterConfig());
+    for (int i = 0; i < 100; i++) {
+      Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
+      m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
+      bw.addMutation(m);
+    }
+    bw.close();
+
+    MRTester.main(TEST_TABLE_3, "False", "True");
+    Assert.assertEquals(38, e1Count);
+    Assert.assertEquals(1, e2Count);
+
+    e2Count = e1Count = 0;
+    MRTester.main(TEST_TABLE_3, "False", "False");
+    Assert.assertEquals(0, e1Count);
+    Assert.assertEquals(0, e2Count);
+
+    e2Count = e1Count = 0;
+    MRTester.main(TEST_TABLE_3, "True", "True");
+    Assert.assertEquals(38, e1Count);
+    Assert.assertEquals(1, e2Count);
+
+  }
+
   @Test
   public void testCorrectRangeInputSplits() throws Exception {
     JobConf job = new JobConf();

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/test/src/main/java/org/apache/accumulo/test/mapreduce/AccumuloFileOutputFormatIT.java
----------------------------------------------------------------------
diff --git a/test/src/main/java/org/apache/accumulo/test/mapreduce/AccumuloFileOutputFormatIT.java b/test/src/main/java/org/apache/accumulo/test/mapreduce/AccumuloFileOutputFormatIT.java
index 8f53378..d00a9b3 100644
--- a/test/src/main/java/org/apache/accumulo/test/mapreduce/AccumuloFileOutputFormatIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/mapreduce/AccumuloFileOutputFormatIT.java
@@ -27,14 +27,22 @@ import java.io.IOException;
 import org.apache.accumulo.core.client.BatchWriter;
 import org.apache.accumulo.core.client.BatchWriterConfig;
 import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat;
 import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
+import org.apache.accumulo.core.conf.DefaultConfiguration;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.file.FileSKVIterator;
+import org.apache.accumulo.core.file.rfile.RFileOperations;
+import org.apache.accumulo.core.sample.RowSampler;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
+import org.apache.accumulo.core.util.CachedConfiguration;
 import org.apache.accumulo.harness.AccumuloClusterHarness;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.Mapper;
@@ -55,6 +63,9 @@ public class AccumuloFileOutputFormatIT extends AccumuloClusterHarness {
   private String TEST_TABLE;
   private String EMPTY_TABLE;
 
+  private static final SamplerConfiguration SAMPLER_CONFIG = new SamplerConfiguration(RowSampler.class.getName()).addOption("hasher", "murmur3_32").addOption(
+      "modulus", "3");
+
   @Override
   protected int defaultTimeoutSeconds() {
     return 4 * 60;
@@ -152,6 +163,7 @@ public class AccumuloFileOutputFormatIT extends AccumuloClusterHarness {
       AccumuloInputFormat.setInputTableName(job, table);
       AccumuloInputFormat.setZooKeeperInstance(job, getCluster().getClientConfig());
       AccumuloFileOutputFormat.setOutputPath(job, new Path(args[1]));
+      AccumuloFileOutputFormat.setSampler(job, SAMPLER_CONFIG);
 
       job.setMapperClass(table.endsWith("_mapreduce_bad_table") ? BadKeyMapper.class : Mapper.class);
       job.setMapOutputKeyClass(Key.class);
@@ -189,6 +201,12 @@ public class AccumuloFileOutputFormatIT extends AccumuloClusterHarness {
     if (content) {
       assertEquals(1, files.length);
       assertTrue(files[0].exists());
+
+      Configuration conf = CachedConfiguration.getInstance();
+      DefaultConfiguration acuconf = DefaultConfiguration.getInstance();
+      FileSKVIterator sample = RFileOperations.getInstance().openReader(files[0].toString(), false, FileSystem.get(conf), conf, acuconf)
+          .getSample(new SamplerConfigurationImpl(SAMPLER_CONFIG));
+      assertNotNull(sample);
     } else {
       assertEquals(0, files.length);
     }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/test/src/main/java/org/apache/accumulo/test/mapreduce/AccumuloInputFormatIT.java
----------------------------------------------------------------------
diff --git a/test/src/main/java/org/apache/accumulo/test/mapreduce/AccumuloInputFormatIT.java b/test/src/main/java/org/apache/accumulo/test/mapreduce/AccumuloInputFormatIT.java
index 1ca4f92..0a5bd68 100644
--- a/test/src/main/java/org/apache/accumulo/test/mapreduce/AccumuloInputFormatIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/mapreduce/AccumuloInputFormatIT.java
@@ -39,6 +39,8 @@ import org.apache.accumulo.core.client.ClientConfiguration;
 import org.apache.accumulo.core.client.ClientConfiguration.ClientProperty;
 import org.apache.accumulo.core.client.Connector;
 import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.admin.NewTableConfiguration;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
 import org.apache.accumulo.core.client.mapreduce.RangeInputSplit;
 import org.apache.accumulo.core.client.mapreduce.impl.BatchInputSplit;
@@ -51,6 +53,7 @@ import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.sample.RowSampler;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.util.Pair;
 import org.apache.accumulo.harness.AccumuloClusterHarness;
@@ -270,15 +273,18 @@ public class AccumuloInputFormatIT extends AccumuloClusterHarness {
     @Override
     public int run(String[] args) throws Exception {
 
-      if (args.length != 2 && args.length != 3) {
-        throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <table> <inputFormatClass> [<batchScan>]");
+      if (args.length != 2 && args.length != 4) {
+        throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <table> <inputFormatClass> [<batchScan> <scan sample>]");
       }
 
       String table = args[0];
       String inputFormatClassName = args[1];
       Boolean batchScan = false;
-      if (args.length == 3)
+      boolean sample = false;
+      if (args.length == 4) {
         batchScan = Boolean.parseBoolean(args[2]);
+        sample = Boolean.parseBoolean(args[3]);
+      }
 
       assertionErrors.put(table + "_map", new AssertionError("Dummy_map"));
       assertionErrors.put(table + "_cleanup", new AssertionError("Dummy_cleanup"));
@@ -296,6 +302,9 @@ public class AccumuloInputFormatIT extends AccumuloClusterHarness {
       AccumuloInputFormat.setConnectorInfo(job, getAdminPrincipal(), getAdminToken());
       AccumuloInputFormat.setInputTableName(job, table);
       AccumuloInputFormat.setBatchScan(job, batchScan);
+      if (sample) {
+        AccumuloInputFormat.setSamplerConfiguration(job, SAMPLER_CONFIG);
+      }
 
       job.setMapperClass(TestMapper.class);
       job.setMapOutputKeyClass(Key.class);
@@ -335,6 +344,38 @@ public class AccumuloInputFormatIT extends AccumuloClusterHarness {
     assertEquals(1, assertionErrors.get(TEST_TABLE_1 + "_cleanup").size());
   }
 
+  private static final SamplerConfiguration SAMPLER_CONFIG = new SamplerConfiguration(RowSampler.class.getName()).addOption("hasher", "murmur3_32").addOption(
+      "modulus", "3");
+
+  @Test
+  public void testSample() throws Exception {
+    final String TEST_TABLE_3 = getUniqueNames(1)[0];
+
+    Connector c = getConnector();
+    c.tableOperations().create(TEST_TABLE_3, new NewTableConfiguration().enableSampling(SAMPLER_CONFIG));
+    BatchWriter bw = c.createBatchWriter(TEST_TABLE_3, new BatchWriterConfig());
+    for (int i = 0; i < 100; i++) {
+      Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
+      m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
+      bw.addMutation(m);
+    }
+    bw.close();
+
+    Assert.assertEquals(0, MRTester.main(new String[] {TEST_TABLE_3, AccumuloInputFormat.class.getName(), "False", "True"}));
+    assertEquals(39, assertionErrors.get(TEST_TABLE_3 + "_map").size());
+    assertEquals(2, assertionErrors.get(TEST_TABLE_3 + "_cleanup").size());
+
+    assertionErrors.clear();
+    Assert.assertEquals(0, MRTester.main(new String[] {TEST_TABLE_3, AccumuloInputFormat.class.getName(), "False", "False"}));
+    assertEquals(1, assertionErrors.get(TEST_TABLE_3 + "_map").size());
+    assertEquals(1, assertionErrors.get(TEST_TABLE_3 + "_cleanup").size());
+
+    assertionErrors.clear();
+    Assert.assertEquals(0, MRTester.main(new String[] {TEST_TABLE_3, AccumuloInputFormat.class.getName(), "True", "True"}));
+    assertEquals(39, assertionErrors.get(TEST_TABLE_3 + "_map").size());
+    assertEquals(2, assertionErrors.get(TEST_TABLE_3 + "_cleanup").size());
+  }
+
   @Test
   public void testMapWithBatchScanner() throws Exception {
     final String TEST_TABLE_2 = getUniqueNames(1)[0];
@@ -349,7 +390,7 @@ public class AccumuloInputFormatIT extends AccumuloClusterHarness {
     }
     bw.close();
 
-    Assert.assertEquals(0, MRTester.main(new String[] {TEST_TABLE_2, AccumuloInputFormat.class.getName(), "True"}));
+    Assert.assertEquals(0, MRTester.main(new String[] {TEST_TABLE_2, AccumuloInputFormat.class.getName(), "True", "False"}));
     assertEquals(1, assertionErrors.get(TEST_TABLE_2 + "_map").size());
     assertEquals(1, assertionErrors.get(TEST_TABLE_2 + "_cleanup").size());
   }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/test/src/main/java/org/apache/accumulo/test/performance/thrift/NullTserver.java
----------------------------------------------------------------------
diff --git a/test/src/main/java/org/apache/accumulo/test/performance/thrift/NullTserver.java b/test/src/main/java/org/apache/accumulo/test/performance/thrift/NullTserver.java
index ef05f37..559703f 100644
--- a/test/src/main/java/org/apache/accumulo/test/performance/thrift/NullTserver.java
+++ b/test/src/main/java/org/apache/accumulo/test/performance/thrift/NullTserver.java
@@ -55,6 +55,7 @@ import org.apache.accumulo.core.tabletserver.thrift.ActiveCompaction;
 import org.apache.accumulo.core.tabletserver.thrift.ActiveScan;
 import org.apache.accumulo.core.tabletserver.thrift.NoSuchScanIDException;
 import org.apache.accumulo.core.tabletserver.thrift.TDurability;
+import org.apache.accumulo.core.tabletserver.thrift.TSamplerConfiguration;
 import org.apache.accumulo.core.tabletserver.thrift.TabletClientService;
 import org.apache.accumulo.core.tabletserver.thrift.TabletClientService.Iface;
 import org.apache.accumulo.core.tabletserver.thrift.TabletClientService.Processor;
@@ -77,6 +78,7 @@ import org.apache.thrift.TException;
 
 import com.beust.jcommander.Parameter;
 import com.google.common.net.HostAndPort;
+
 import static com.google.common.util.concurrent.Uninterruptibles.sleepUninterruptibly;
 
 /**
@@ -136,14 +138,14 @@ public class NullTserver {
 
     @Override
     public InitialMultiScan startMultiScan(TInfo tinfo, TCredentials credentials, Map<TKeyExtent,List<TRange>> batch, List<TColumn> columns,
-        List<IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, long batchTimeOut) {
+        List<IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, TSamplerConfiguration tsc, long batchTimeOut) {
       return null;
     }
 
     @Override
     public InitialScan startScan(TInfo tinfo, TCredentials credentials, TKeyExtent extent, TRange range, List<TColumn> columns, int batchSize,
         List<IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, boolean isolated,
-        long readaheadThreshold, long batchTimeOut) {
+        long readaheadThreshold, TSamplerConfiguration tsc, long batchTimeOut) {
       return null;
     }
 


[4/7] accumulo git commit: ACCUMULO-3913 Added per table sampling

Posted by kt...@apache.org.
http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/tabletserver/thrift/TabletClientService.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/tabletserver/thrift/TabletClientService.java b/core/src/main/java/org/apache/accumulo/core/tabletserver/thrift/TabletClientService.java
index bd0f79c..f453788 100644
--- a/core/src/main/java/org/apache/accumulo/core/tabletserver/thrift/TabletClientService.java
+++ b/core/src/main/java/org/apache/accumulo/core/tabletserver/thrift/TabletClientService.java
@@ -52,15 +52,15 @@ import org.slf4j.LoggerFactory;
 
   public interface Iface extends org.apache.accumulo.core.client.impl.thrift.ClientService.Iface {
 
-    public org.apache.accumulo.core.data.thrift.InitialScan startScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, org.apache.accumulo.core.data.thrift.TKeyExtent extent, org.apache.accumulo.core.data.thrift.TRange range, List<org.apache.accumulo.core.data.thrift.TColumn> columns, int batchSize, List<org.apache.accumulo.core.data.thrift.IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, boolean isolated, long readaheadThreshold, long batchTimeOut) throws org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException, NotServingTabletException, TooManyFilesException, org.apache.thrift.TException;
+    public org.apache.accumulo.core.data.thrift.InitialScan startScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, org.apache.accumulo.core.data.thrift.TKeyExtent extent, org.apache.accumulo.core.data.thrift.TRange range, List<org.apache.accumulo.core.data.thrift.TColumn> columns, int batchSize, List<org.apache.accumulo.core.data.thrift.IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, boolean isolated, long readaheadThreshold, TSamplerConfiguration samplerConfig, long batchTimeOut) throws org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException, NotServingTabletException, TooManyFilesException, TSampleNotPresentException, org.apache.thrift.TException;
 
-    public org.apache.accumulo.core.data.thrift.ScanResult continueScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, long scanID) throws NoSuchScanIDException, NotServingTabletException, TooManyFilesException, org.apache.thrift.TException;
+    public org.apache.accumulo.core.data.thrift.ScanResult continueScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, long scanID) throws NoSuchScanIDException, NotServingTabletException, TooManyFilesException, TSampleNotPresentException, org.apache.thrift.TException;
 
     public void closeScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, long scanID) throws org.apache.thrift.TException;
 
-    public org.apache.accumulo.core.data.thrift.InitialMultiScan startMultiScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, Map<org.apache.accumulo.core.data.thrift.TKeyExtent,List<org.apache.accumulo.core.data.thrift.TRange>> batch, List<org.apache.accumulo.core.data.thrift.TColumn> columns, List<org.apache.accumulo.core.data.thrift.IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, long batchTimeOut) throws org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException, org.apache.thrift.TException;
+    public org.apache.accumulo.core.data.thrift.InitialMultiScan startMultiScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, Map<org.apache.accumulo.core.data.thrift.TKeyExtent,List<org.apache.accumulo.core.data.thrift.TRange>> batch, List<org.apache.accumulo.core.data.thrift.TColumn> columns, List<org.apache.accumulo.core.data.thrift.IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, TSamplerConfiguration samplerConfig, long batchTimeOut) throws org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException, TSampleNotPresentException, org.apache.thrift.TException;
 
-    public org.apache.accumulo.core.data.thrift.MultiScanResult continueMultiScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, long scanID) throws NoSuchScanIDException, org.apache.thrift.TException;
+    public org.apache.accumulo.core.data.thrift.MultiScanResult continueMultiScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, long scanID) throws NoSuchScanIDException, TSampleNotPresentException, org.apache.thrift.TException;
 
     public void closeMultiScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, long scanID) throws NoSuchScanIDException, org.apache.thrift.TException;
 
@@ -118,13 +118,13 @@ import org.slf4j.LoggerFactory;
 
   public interface AsyncIface extends org.apache.accumulo.core.client.impl.thrift.ClientService .AsyncIface {
 
-    public void startScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, org.apache.accumulo.core.data.thrift.TKeyExtent extent, org.apache.accumulo.core.data.thrift.TRange range, List<org.apache.accumulo.core.data.thrift.TColumn> columns, int batchSize, List<org.apache.accumulo.core.data.thrift.IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, boolean isolated, long readaheadThreshold, long batchTimeOut, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
+    public void startScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, org.apache.accumulo.core.data.thrift.TKeyExtent extent, org.apache.accumulo.core.data.thrift.TRange range, List<org.apache.accumulo.core.data.thrift.TColumn> columns, int batchSize, List<org.apache.accumulo.core.data.thrift.IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, boolean isolated, long readaheadThreshold, TSamplerConfiguration samplerConfig, long batchTimeOut, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
 
     public void continueScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, long scanID, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
 
     public void closeScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, long scanID, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
 
-    public void startMultiScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, Map<org.apache.accumulo.core.data.thrift.TKeyExtent,List<org.apache.accumulo.core.data.thrift.TRange>> batch, List<org.apache.accumulo.core.data.thrift.TColumn> columns, List<org.apache.accumulo.core.data.thrift.IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, long batchTimeOut, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
+    public void startMultiScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, Map<org.apache.accumulo.core.data.thrift.TKeyExtent,List<org.apache.accumulo.core.data.thrift.TRange>> batch, List<org.apache.accumulo.core.data.thrift.TColumn> columns, List<org.apache.accumulo.core.data.thrift.IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, TSamplerConfiguration samplerConfig, long batchTimeOut, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
 
     public void continueMultiScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, long scanID, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
 
@@ -202,13 +202,13 @@ import org.slf4j.LoggerFactory;
       super(iprot, oprot);
     }
 
-    public org.apache.accumulo.core.data.thrift.InitialScan startScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, org.apache.accumulo.core.data.thrift.TKeyExtent extent, org.apache.accumulo.core.data.thrift.TRange range, List<org.apache.accumulo.core.data.thrift.TColumn> columns, int batchSize, List<org.apache.accumulo.core.data.thrift.IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, boolean isolated, long readaheadThreshold, long batchTimeOut) throws org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException, NotServingTabletException, TooManyFilesException, org.apache.thrift.TException
+    public org.apache.accumulo.core.data.thrift.InitialScan startScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, org.apache.accumulo.core.data.thrift.TKeyExtent extent, org.apache.accumulo.core.data.thrift.TRange range, List<org.apache.accumulo.core.data.thrift.TColumn> columns, int batchSize, List<org.apache.accumulo.core.data.thrift.IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, boolean isolated, long readaheadThreshold, TSamplerConfiguration samplerConfig, long batchTimeOut) throws org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException, NotServingTabletException, TooManyFilesException, TSampleNotPresentException, org.apache.thrift.TException
     {
-      send_startScan(tinfo, credentials, extent, range, columns, batchSize, ssiList, ssio, authorizations, waitForWrites, isolated, readaheadThreshold, batchTimeOut);
+      send_startScan(tinfo, credentials, extent, range, columns, batchSize, ssiList, ssio, authorizations, waitForWrites, isolated, readaheadThreshold, samplerConfig, batchTimeOut);
       return recv_startScan();
     }
 
-    public void send_startScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, org.apache.accumulo.core.data.thrift.TKeyExtent extent, org.apache.accumulo.core.data.thrift.TRange range, List<org.apache.accumulo.core.data.thrift.TColumn> columns, int batchSize, List<org.apache.accumulo.core.data.thrift.IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, boolean isolated, long readaheadThreshold, long batchTimeOut) throws org.apache.thrift.TException
+    public void send_startScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, org.apache.accumulo.core.data.thrift.TKeyExtent extent, org.apache.accumulo.core.data.thrift.TRange range, List<org.apache.accumulo.core.data.thrift.TColumn> columns, int batchSize, List<org.apache.accumulo.core.data.thrift.IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, boolean isolated, long readaheadThreshold, TSamplerConfiguration samplerConfig, long batchTimeOut) throws org.apache.thrift.TException
     {
       startScan_args args = new startScan_args();
       args.setTinfo(tinfo);
@@ -223,11 +223,12 @@ import org.slf4j.LoggerFactory;
       args.setWaitForWrites(waitForWrites);
       args.setIsolated(isolated);
       args.setReadaheadThreshold(readaheadThreshold);
+      args.setSamplerConfig(samplerConfig);
       args.setBatchTimeOut(batchTimeOut);
       sendBase("startScan", args);
     }
 
-    public org.apache.accumulo.core.data.thrift.InitialScan recv_startScan() throws org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException, NotServingTabletException, TooManyFilesException, org.apache.thrift.TException
+    public org.apache.accumulo.core.data.thrift.InitialScan recv_startScan() throws org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException, NotServingTabletException, TooManyFilesException, TSampleNotPresentException, org.apache.thrift.TException
     {
       startScan_result result = new startScan_result();
       receiveBase(result, "startScan");
@@ -243,10 +244,13 @@ import org.slf4j.LoggerFactory;
       if (result.tmfe != null) {
         throw result.tmfe;
       }
+      if (result.tsnpe != null) {
+        throw result.tsnpe;
+      }
       throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "startScan failed: unknown result");
     }
 
-    public org.apache.accumulo.core.data.thrift.ScanResult continueScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, long scanID) throws NoSuchScanIDException, NotServingTabletException, TooManyFilesException, org.apache.thrift.TException
+    public org.apache.accumulo.core.data.thrift.ScanResult continueScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, long scanID) throws NoSuchScanIDException, NotServingTabletException, TooManyFilesException, TSampleNotPresentException, org.apache.thrift.TException
     {
       send_continueScan(tinfo, scanID);
       return recv_continueScan();
@@ -260,7 +264,7 @@ import org.slf4j.LoggerFactory;
       sendBase("continueScan", args);
     }
 
-    public org.apache.accumulo.core.data.thrift.ScanResult recv_continueScan() throws NoSuchScanIDException, NotServingTabletException, TooManyFilesException, org.apache.thrift.TException
+    public org.apache.accumulo.core.data.thrift.ScanResult recv_continueScan() throws NoSuchScanIDException, NotServingTabletException, TooManyFilesException, TSampleNotPresentException, org.apache.thrift.TException
     {
       continueScan_result result = new continueScan_result();
       receiveBase(result, "continueScan");
@@ -276,6 +280,9 @@ import org.slf4j.LoggerFactory;
       if (result.tmfe != null) {
         throw result.tmfe;
       }
+      if (result.tsnpe != null) {
+        throw result.tsnpe;
+      }
       throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "continueScan failed: unknown result");
     }
 
@@ -292,13 +299,13 @@ import org.slf4j.LoggerFactory;
       sendBase("closeScan", args);
     }
 
-    public org.apache.accumulo.core.data.thrift.InitialMultiScan startMultiScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, Map<org.apache.accumulo.core.data.thrift.TKeyExtent,List<org.apache.accumulo.core.data.thrift.TRange>> batch, List<org.apache.accumulo.core.data.thrift.TColumn> columns, List<org.apache.accumulo.core.data.thrift.IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, long batchTimeOut) throws org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException, org.apache.thrift.TException
+    public org.apache.accumulo.core.data.thrift.InitialMultiScan startMultiScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, Map<org.apache.accumulo.core.data.thrift.TKeyExtent,List<org.apache.accumulo.core.data.thrift.TRange>> batch, List<org.apache.accumulo.core.data.thrift.TColumn> columns, List<org.apache.accumulo.core.data.thrift.IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, TSamplerConfiguration samplerConfig, long batchTimeOut) throws org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException, TSampleNotPresentException, org.apache.thrift.TException
     {
-      send_startMultiScan(tinfo, credentials, batch, columns, ssiList, ssio, authorizations, waitForWrites, batchTimeOut);
+      send_startMultiScan(tinfo, credentials, batch, columns, ssiList, ssio, authorizations, waitForWrites, samplerConfig, batchTimeOut);
       return recv_startMultiScan();
     }
 
-    public void send_startMultiScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, Map<org.apache.accumulo.core.data.thrift.TKeyExtent,List<org.apache.accumulo.core.data.thrift.TRange>> batch, List<org.apache.accumulo.core.data.thrift.TColumn> columns, List<org.apache.accumulo.core.data.thrift.IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, long batchTimeOut) throws org.apache.thrift.TException
+    public void send_startMultiScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, Map<org.apache.accumulo.core.data.thrift.TKeyExtent,List<org.apache.accumulo.core.data.thrift.TRange>> batch, List<org.apache.accumulo.core.data.thrift.TColumn> columns, List<org.apache.accumulo.core.data.thrift.IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, TSamplerConfiguration samplerConfig, long batchTimeOut) throws org.apache.thrift.TException
     {
       startMultiScan_args args = new startMultiScan_args();
       args.setTinfo(tinfo);
@@ -309,11 +316,12 @@ import org.slf4j.LoggerFactory;
       args.setSsio(ssio);
       args.setAuthorizations(authorizations);
       args.setWaitForWrites(waitForWrites);
+      args.setSamplerConfig(samplerConfig);
       args.setBatchTimeOut(batchTimeOut);
       sendBase("startMultiScan", args);
     }
 
-    public org.apache.accumulo.core.data.thrift.InitialMultiScan recv_startMultiScan() throws org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException, org.apache.thrift.TException
+    public org.apache.accumulo.core.data.thrift.InitialMultiScan recv_startMultiScan() throws org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException, TSampleNotPresentException, org.apache.thrift.TException
     {
       startMultiScan_result result = new startMultiScan_result();
       receiveBase(result, "startMultiScan");
@@ -323,10 +331,13 @@ import org.slf4j.LoggerFactory;
       if (result.sec != null) {
         throw result.sec;
       }
+      if (result.tsnpe != null) {
+        throw result.tsnpe;
+      }
       throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "startMultiScan failed: unknown result");
     }
 
-    public org.apache.accumulo.core.data.thrift.MultiScanResult continueMultiScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, long scanID) throws NoSuchScanIDException, org.apache.thrift.TException
+    public org.apache.accumulo.core.data.thrift.MultiScanResult continueMultiScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, long scanID) throws NoSuchScanIDException, TSampleNotPresentException, org.apache.thrift.TException
     {
       send_continueMultiScan(tinfo, scanID);
       return recv_continueMultiScan();
@@ -340,7 +351,7 @@ import org.slf4j.LoggerFactory;
       sendBase("continueMultiScan", args);
     }
 
-    public org.apache.accumulo.core.data.thrift.MultiScanResult recv_continueMultiScan() throws NoSuchScanIDException, org.apache.thrift.TException
+    public org.apache.accumulo.core.data.thrift.MultiScanResult recv_continueMultiScan() throws NoSuchScanIDException, TSampleNotPresentException, org.apache.thrift.TException
     {
       continueMultiScan_result result = new continueMultiScan_result();
       receiveBase(result, "continueMultiScan");
@@ -350,6 +361,9 @@ import org.slf4j.LoggerFactory;
       if (result.nssi != null) {
         throw result.nssi;
       }
+      if (result.tsnpe != null) {
+        throw result.tsnpe;
+      }
       throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "continueMultiScan failed: unknown result");
     }
 
@@ -958,9 +972,9 @@ import org.slf4j.LoggerFactory;
       super(protocolFactory, clientManager, transport);
     }
 
-    public void startScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, org.apache.accumulo.core.data.thrift.TKeyExtent extent, org.apache.accumulo.core.data.thrift.TRange range, List<org.apache.accumulo.core.data.thrift.TColumn> columns, int batchSize, List<org.apache.accumulo.core.data.thrift.IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, boolean isolated, long readaheadThreshold, long batchTimeOut, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
+    public void startScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, org.apache.accumulo.core.data.thrift.TKeyExtent extent, org.apache.accumulo.core.data.thrift.TRange range, List<org.apache.accumulo.core.data.thrift.TColumn> columns, int batchSize, List<org.apache.accumulo.core.data.thrift.IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, boolean isolated, long readaheadThreshold, TSamplerConfiguration samplerConfig, long batchTimeOut, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
       checkReady();
-      startScan_call method_call = new startScan_call(tinfo, credentials, extent, range, columns, batchSize, ssiList, ssio, authorizations, waitForWrites, isolated, readaheadThreshold, batchTimeOut, resultHandler, this, ___protocolFactory, ___transport);
+      startScan_call method_call = new startScan_call(tinfo, credentials, extent, range, columns, batchSize, ssiList, ssio, authorizations, waitForWrites, isolated, readaheadThreshold, samplerConfig, batchTimeOut, resultHandler, this, ___protocolFactory, ___transport);
       this.___currentMethod = method_call;
       ___manager.call(method_call);
     }
@@ -978,8 +992,9 @@ import org.slf4j.LoggerFactory;
       private boolean waitForWrites;
       private boolean isolated;
       private long readaheadThreshold;
+      private TSamplerConfiguration samplerConfig;
       private long batchTimeOut;
-      public startScan_call(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, org.apache.accumulo.core.data.thrift.TKeyExtent extent, org.apache.accumulo.core.data.thrift.TRange range, List<org.apache.accumulo.core.data.thrift.TColumn> columns, int batchSize, List<org.apache.accumulo.core.data.thrift.IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, boolean isolated, long readaheadThreshold, long batchTimeOut, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
+      public startScan_call(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, org.apache.accumulo.core.data.thrift.TKeyExtent extent, org.apache.accumulo.core.data.thrift.TRange range, List<org.apache.accumulo.core.data.thrift.TColumn> columns, int batchSize, List<org.apache.accumulo.core.data.thrift.IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, boolean isolated, long readaheadThreshold, TSamplerConfiguration samplerConfig, long batchTimeOut, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
         super(client, protocolFactory, transport, resultHandler, false);
         this.tinfo = tinfo;
         this.credentials = credentials;
@@ -993,6 +1008,7 @@ import org.slf4j.LoggerFactory;
         this.waitForWrites = waitForWrites;
         this.isolated = isolated;
         this.readaheadThreshold = readaheadThreshold;
+        this.samplerConfig = samplerConfig;
         this.batchTimeOut = batchTimeOut;
       }
 
@@ -1011,12 +1027,13 @@ import org.slf4j.LoggerFactory;
         args.setWaitForWrites(waitForWrites);
         args.setIsolated(isolated);
         args.setReadaheadThreshold(readaheadThreshold);
+        args.setSamplerConfig(samplerConfig);
         args.setBatchTimeOut(batchTimeOut);
         args.write(prot);
         prot.writeMessageEnd();
       }
 
-      public org.apache.accumulo.core.data.thrift.InitialScan getResult() throws org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException, NotServingTabletException, TooManyFilesException, org.apache.thrift.TException {
+      public org.apache.accumulo.core.data.thrift.InitialScan getResult() throws org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException, NotServingTabletException, TooManyFilesException, TSampleNotPresentException, org.apache.thrift.TException {
         if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
           throw new IllegalStateException("Method call not finished!");
         }
@@ -1051,7 +1068,7 @@ import org.slf4j.LoggerFactory;
         prot.writeMessageEnd();
       }
 
-      public org.apache.accumulo.core.data.thrift.ScanResult getResult() throws NoSuchScanIDException, NotServingTabletException, TooManyFilesException, org.apache.thrift.TException {
+      public org.apache.accumulo.core.data.thrift.ScanResult getResult() throws NoSuchScanIDException, NotServingTabletException, TooManyFilesException, TSampleNotPresentException, org.apache.thrift.TException {
         if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
           throw new IllegalStateException("Method call not finished!");
         }
@@ -1095,9 +1112,9 @@ import org.slf4j.LoggerFactory;
       }
     }
 
-    public void startMultiScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, Map<org.apache.accumulo.core.data.thrift.TKeyExtent,List<org.apache.accumulo.core.data.thrift.TRange>> batch, List<org.apache.accumulo.core.data.thrift.TColumn> columns, List<org.apache.accumulo.core.data.thrift.IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, long batchTimeOut, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
+    public void startMultiScan(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, Map<org.apache.accumulo.core.data.thrift.TKeyExtent,List<org.apache.accumulo.core.data.thrift.TRange>> batch, List<org.apache.accumulo.core.data.thrift.TColumn> columns, List<org.apache.accumulo.core.data.thrift.IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, TSamplerConfiguration samplerConfig, long batchTimeOut, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
       checkReady();
-      startMultiScan_call method_call = new startMultiScan_call(tinfo, credentials, batch, columns, ssiList, ssio, authorizations, waitForWrites, batchTimeOut, resultHandler, this, ___protocolFactory, ___transport);
+      startMultiScan_call method_call = new startMultiScan_call(tinfo, credentials, batch, columns, ssiList, ssio, authorizations, waitForWrites, samplerConfig, batchTimeOut, resultHandler, this, ___protocolFactory, ___transport);
       this.___currentMethod = method_call;
       ___manager.call(method_call);
     }
@@ -1111,8 +1128,9 @@ import org.slf4j.LoggerFactory;
       private Map<String,Map<String,String>> ssio;
       private List<ByteBuffer> authorizations;
       private boolean waitForWrites;
+      private TSamplerConfiguration samplerConfig;
       private long batchTimeOut;
-      public startMultiScan_call(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, Map<org.apache.accumulo.core.data.thrift.TKeyExtent,List<org.apache.accumulo.core.data.thrift.TRange>> batch, List<org.apache.accumulo.core.data.thrift.TColumn> columns, List<org.apache.accumulo.core.data.thrift.IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, long batchTimeOut, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
+      public startMultiScan_call(org.apache.accumulo.core.trace.thrift.TInfo tinfo, org.apache.accumulo.core.security.thrift.TCredentials credentials, Map<org.apache.accumulo.core.data.thrift.TKeyExtent,List<org.apache.accumulo.core.data.thrift.TRange>> batch, List<org.apache.accumulo.core.data.thrift.TColumn> columns, List<org.apache.accumulo.core.data.thrift.IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, TSamplerConfiguration samplerConfig, long batchTimeOut, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
         super(client, protocolFactory, transport, resultHandler, false);
         this.tinfo = tinfo;
         this.credentials = credentials;
@@ -1122,6 +1140,7 @@ import org.slf4j.LoggerFactory;
         this.ssio = ssio;
         this.authorizations = authorizations;
         this.waitForWrites = waitForWrites;
+        this.samplerConfig = samplerConfig;
         this.batchTimeOut = batchTimeOut;
       }
 
@@ -1136,12 +1155,13 @@ import org.slf4j.LoggerFactory;
         args.setSsio(ssio);
         args.setAuthorizations(authorizations);
         args.setWaitForWrites(waitForWrites);
+        args.setSamplerConfig(samplerConfig);
         args.setBatchTimeOut(batchTimeOut);
         args.write(prot);
         prot.writeMessageEnd();
       }
 
-      public org.apache.accumulo.core.data.thrift.InitialMultiScan getResult() throws org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException, org.apache.thrift.TException {
+      public org.apache.accumulo.core.data.thrift.InitialMultiScan getResult() throws org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException, TSampleNotPresentException, org.apache.thrift.TException {
         if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
           throw new IllegalStateException("Method call not finished!");
         }
@@ -1176,7 +1196,7 @@ import org.slf4j.LoggerFactory;
         prot.writeMessageEnd();
       }
 
-      public org.apache.accumulo.core.data.thrift.MultiScanResult getResult() throws NoSuchScanIDException, org.apache.thrift.TException {
+      public org.apache.accumulo.core.data.thrift.MultiScanResult getResult() throws NoSuchScanIDException, TSampleNotPresentException, org.apache.thrift.TException {
         if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
           throw new IllegalStateException("Method call not finished!");
         }
@@ -2260,13 +2280,15 @@ import org.slf4j.LoggerFactory;
       public startScan_result getResult(I iface, startScan_args args) throws org.apache.thrift.TException {
         startScan_result result = new startScan_result();
         try {
-          result.success = iface.startScan(args.tinfo, args.credentials, args.extent, args.range, args.columns, args.batchSize, args.ssiList, args.ssio, args.authorizations, args.waitForWrites, args.isolated, args.readaheadThreshold, args.batchTimeOut);
+          result.success = iface.startScan(args.tinfo, args.credentials, args.extent, args.range, args.columns, args.batchSize, args.ssiList, args.ssio, args.authorizations, args.waitForWrites, args.isolated, args.readaheadThreshold, args.samplerConfig, args.batchTimeOut);
         } catch (org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException sec) {
           result.sec = sec;
         } catch (NotServingTabletException nste) {
           result.nste = nste;
         } catch (TooManyFilesException tmfe) {
           result.tmfe = tmfe;
+        } catch (TSampleNotPresentException tsnpe) {
+          result.tsnpe = tsnpe;
         }
         return result;
       }
@@ -2295,6 +2317,8 @@ import org.slf4j.LoggerFactory;
           result.nste = nste;
         } catch (TooManyFilesException tmfe) {
           result.tmfe = tmfe;
+        } catch (TSampleNotPresentException tsnpe) {
+          result.tsnpe = tsnpe;
         }
         return result;
       }
@@ -2335,9 +2359,11 @@ import org.slf4j.LoggerFactory;
       public startMultiScan_result getResult(I iface, startMultiScan_args args) throws org.apache.thrift.TException {
         startMultiScan_result result = new startMultiScan_result();
         try {
-          result.success = iface.startMultiScan(args.tinfo, args.credentials, args.batch, args.columns, args.ssiList, args.ssio, args.authorizations, args.waitForWrites, args.batchTimeOut);
+          result.success = iface.startMultiScan(args.tinfo, args.credentials, args.batch, args.columns, args.ssiList, args.ssio, args.authorizations, args.waitForWrites, args.samplerConfig, args.batchTimeOut);
         } catch (org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException sec) {
           result.sec = sec;
+        } catch (TSampleNotPresentException tsnpe) {
+          result.tsnpe = tsnpe;
         }
         return result;
       }
@@ -2362,6 +2388,8 @@ import org.slf4j.LoggerFactory;
           result.success = iface.continueMultiScan(args.tinfo, args.scanID);
         } catch (NoSuchScanIDException nssi) {
           result.nssi = nssi;
+        } catch (TSampleNotPresentException tsnpe) {
+          result.tsnpe = tsnpe;
         }
         return result;
       }
@@ -3029,6 +3057,11 @@ import org.slf4j.LoggerFactory;
                         result.setTmfeIsSet(true);
                         msg = result;
             }
+            else             if (e instanceof TSampleNotPresentException) {
+                        result.tsnpe = (TSampleNotPresentException) e;
+                        result.setTsnpeIsSet(true);
+                        msg = result;
+            }
              else 
             {
               msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
@@ -3050,7 +3083,7 @@ import org.slf4j.LoggerFactory;
       }
 
       public void start(I iface, startScan_args args, org.apache.thrift.async.AsyncMethodCallback<org.apache.accumulo.core.data.thrift.InitialScan> resultHandler) throws TException {
-        iface.startScan(args.tinfo, args.credentials, args.extent, args.range, args.columns, args.batchSize, args.ssiList, args.ssio, args.authorizations, args.waitForWrites, args.isolated, args.readaheadThreshold, args.batchTimeOut,resultHandler);
+        iface.startScan(args.tinfo, args.credentials, args.extent, args.range, args.columns, args.batchSize, args.ssiList, args.ssio, args.authorizations, args.waitForWrites, args.isolated, args.readaheadThreshold, args.samplerConfig, args.batchTimeOut,resultHandler);
       }
     }
 
@@ -3096,6 +3129,11 @@ import org.slf4j.LoggerFactory;
                         result.setTmfeIsSet(true);
                         msg = result;
             }
+            else             if (e instanceof TSampleNotPresentException) {
+                        result.tsnpe = (TSampleNotPresentException) e;
+                        result.setTsnpeIsSet(true);
+                        msg = result;
+            }
              else 
             {
               msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
@@ -3181,6 +3219,11 @@ import org.slf4j.LoggerFactory;
                         result.setSecIsSet(true);
                         msg = result;
             }
+            else             if (e instanceof TSampleNotPresentException) {
+                        result.tsnpe = (TSampleNotPresentException) e;
+                        result.setTsnpeIsSet(true);
+                        msg = result;
+            }
              else 
             {
               msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
@@ -3202,7 +3245,7 @@ import org.slf4j.LoggerFactory;
       }
 
       public void start(I iface, startMultiScan_args args, org.apache.thrift.async.AsyncMethodCallback<org.apache.accumulo.core.data.thrift.InitialMultiScan> resultHandler) throws TException {
-        iface.startMultiScan(args.tinfo, args.credentials, args.batch, args.columns, args.ssiList, args.ssio, args.authorizations, args.waitForWrites, args.batchTimeOut,resultHandler);
+        iface.startMultiScan(args.tinfo, args.credentials, args.batch, args.columns, args.ssiList, args.ssio, args.authorizations, args.waitForWrites, args.samplerConfig, args.batchTimeOut,resultHandler);
       }
     }
 
@@ -3238,6 +3281,11 @@ import org.slf4j.LoggerFactory;
                         result.setNssiIsSet(true);
                         msg = result;
             }
+            else             if (e instanceof TSampleNotPresentException) {
+                        result.tsnpe = (TSampleNotPresentException) e;
+                        result.setTsnpeIsSet(true);
+                        msg = result;
+            }
              else 
             {
               msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
@@ -4471,7 +4519,8 @@ import org.slf4j.LoggerFactory;
     private static final org.apache.thrift.protocol.TField WAIT_FOR_WRITES_FIELD_DESC = new org.apache.thrift.protocol.TField("waitForWrites", org.apache.thrift.protocol.TType.BOOL, (short)9);
     private static final org.apache.thrift.protocol.TField ISOLATED_FIELD_DESC = new org.apache.thrift.protocol.TField("isolated", org.apache.thrift.protocol.TType.BOOL, (short)10);
     private static final org.apache.thrift.protocol.TField READAHEAD_THRESHOLD_FIELD_DESC = new org.apache.thrift.protocol.TField("readaheadThreshold", org.apache.thrift.protocol.TType.I64, (short)12);
-    private static final org.apache.thrift.protocol.TField BATCH_TIME_OUT_FIELD_DESC = new org.apache.thrift.protocol.TField("batchTimeOut", org.apache.thrift.protocol.TType.I64, (short)13);
+    private static final org.apache.thrift.protocol.TField SAMPLER_CONFIG_FIELD_DESC = new org.apache.thrift.protocol.TField("samplerConfig", org.apache.thrift.protocol.TType.STRUCT, (short)13);
+    private static final org.apache.thrift.protocol.TField BATCH_TIME_OUT_FIELD_DESC = new org.apache.thrift.protocol.TField("batchTimeOut", org.apache.thrift.protocol.TType.I64, (short)14);
 
     private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
     static {
@@ -4491,6 +4540,7 @@ import org.slf4j.LoggerFactory;
     public boolean waitForWrites; // required
     public boolean isolated; // required
     public long readaheadThreshold; // required
+    public TSamplerConfiguration samplerConfig; // required
     public long batchTimeOut; // required
 
     /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
@@ -4507,7 +4557,8 @@ import org.slf4j.LoggerFactory;
       WAIT_FOR_WRITES((short)9, "waitForWrites"),
       ISOLATED((short)10, "isolated"),
       READAHEAD_THRESHOLD((short)12, "readaheadThreshold"),
-      BATCH_TIME_OUT((short)13, "batchTimeOut");
+      SAMPLER_CONFIG((short)13, "samplerConfig"),
+      BATCH_TIME_OUT((short)14, "batchTimeOut");
 
       private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
 
@@ -4546,7 +4597,9 @@ import org.slf4j.LoggerFactory;
             return ISOLATED;
           case 12: // READAHEAD_THRESHOLD
             return READAHEAD_THRESHOLD;
-          case 13: // BATCH_TIME_OUT
+          case 13: // SAMPLER_CONFIG
+            return SAMPLER_CONFIG;
+          case 14: // BATCH_TIME_OUT
             return BATCH_TIME_OUT;
           default:
             return null;
@@ -4628,6 +4681,8 @@ import org.slf4j.LoggerFactory;
           new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL)));
       tmpMap.put(_Fields.READAHEAD_THRESHOLD, new org.apache.thrift.meta_data.FieldMetaData("readaheadThreshold", org.apache.thrift.TFieldRequirementType.DEFAULT, 
           new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
+      tmpMap.put(_Fields.SAMPLER_CONFIG, new org.apache.thrift.meta_data.FieldMetaData("samplerConfig", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSamplerConfiguration.class)));
       tmpMap.put(_Fields.BATCH_TIME_OUT, new org.apache.thrift.meta_data.FieldMetaData("batchTimeOut", org.apache.thrift.TFieldRequirementType.DEFAULT, 
           new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
       metaDataMap = Collections.unmodifiableMap(tmpMap);
@@ -4650,6 +4705,7 @@ import org.slf4j.LoggerFactory;
       boolean waitForWrites,
       boolean isolated,
       long readaheadThreshold,
+      TSamplerConfiguration samplerConfig,
       long batchTimeOut)
     {
       this();
@@ -4669,6 +4725,7 @@ import org.slf4j.LoggerFactory;
       setIsolatedIsSet(true);
       this.readaheadThreshold = readaheadThreshold;
       setReadaheadThresholdIsSet(true);
+      this.samplerConfig = samplerConfig;
       this.batchTimeOut = batchTimeOut;
       setBatchTimeOutIsSet(true);
     }
@@ -4727,6 +4784,9 @@ import org.slf4j.LoggerFactory;
       this.waitForWrites = other.waitForWrites;
       this.isolated = other.isolated;
       this.readaheadThreshold = other.readaheadThreshold;
+      if (other.isSetSamplerConfig()) {
+        this.samplerConfig = new TSamplerConfiguration(other.samplerConfig);
+      }
       this.batchTimeOut = other.batchTimeOut;
     }
 
@@ -4752,6 +4812,7 @@ import org.slf4j.LoggerFactory;
       this.isolated = false;
       setReadaheadThresholdIsSet(false);
       this.readaheadThreshold = 0;
+      this.samplerConfig = null;
       setBatchTimeOutIsSet(false);
       this.batchTimeOut = 0;
     }
@@ -5096,6 +5157,30 @@ import org.slf4j.LoggerFactory;
       __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __READAHEADTHRESHOLD_ISSET_ID, value);
     }
 
+    public TSamplerConfiguration getSamplerConfig() {
+      return this.samplerConfig;
+    }
+
+    public startScan_args setSamplerConfig(TSamplerConfiguration samplerConfig) {
+      this.samplerConfig = samplerConfig;
+      return this;
+    }
+
+    public void unsetSamplerConfig() {
+      this.samplerConfig = null;
+    }
+
+    /** Returns true if field samplerConfig is set (has been assigned a value) and false otherwise */
+    public boolean isSetSamplerConfig() {
+      return this.samplerConfig != null;
+    }
+
+    public void setSamplerConfigIsSet(boolean value) {
+      if (!value) {
+        this.samplerConfig = null;
+      }
+    }
+
     public long getBatchTimeOut() {
       return this.batchTimeOut;
     }
@@ -5217,6 +5302,14 @@ import org.slf4j.LoggerFactory;
         }
         break;
 
+      case SAMPLER_CONFIG:
+        if (value == null) {
+          unsetSamplerConfig();
+        } else {
+          setSamplerConfig((TSamplerConfiguration)value);
+        }
+        break;
+
       case BATCH_TIME_OUT:
         if (value == null) {
           unsetBatchTimeOut();
@@ -5266,6 +5359,9 @@ import org.slf4j.LoggerFactory;
       case READAHEAD_THRESHOLD:
         return Long.valueOf(getReadaheadThreshold());
 
+      case SAMPLER_CONFIG:
+        return getSamplerConfig();
+
       case BATCH_TIME_OUT:
         return Long.valueOf(getBatchTimeOut());
 
@@ -5304,6 +5400,8 @@ import org.slf4j.LoggerFactory;
         return isSetIsolated();
       case READAHEAD_THRESHOLD:
         return isSetReadaheadThreshold();
+      case SAMPLER_CONFIG:
+        return isSetSamplerConfig();
       case BATCH_TIME_OUT:
         return isSetBatchTimeOut();
       }
@@ -5431,6 +5529,15 @@ import org.slf4j.LoggerFactory;
           return false;
       }
 
+      boolean this_present_samplerConfig = true && this.isSetSamplerConfig();
+      boolean that_present_samplerConfig = true && that.isSetSamplerConfig();
+      if (this_present_samplerConfig || that_present_samplerConfig) {
+        if (!(this_present_samplerConfig && that_present_samplerConfig))
+          return false;
+        if (!this.samplerConfig.equals(that.samplerConfig))
+          return false;
+      }
+
       boolean this_present_batchTimeOut = true;
       boolean that_present_batchTimeOut = true;
       if (this_present_batchTimeOut || that_present_batchTimeOut) {
@@ -5576,6 +5683,16 @@ import org.slf4j.LoggerFactory;
           return lastComparison;
         }
       }
+      lastComparison = Boolean.valueOf(isSetSamplerConfig()).compareTo(other.isSetSamplerConfig());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetSamplerConfig()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.samplerConfig, other.samplerConfig);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
       lastComparison = Boolean.valueOf(isSetBatchTimeOut()).compareTo(other.isSetBatchTimeOut());
       if (lastComparison != 0) {
         return lastComparison;
@@ -5686,6 +5803,14 @@ import org.slf4j.LoggerFactory;
       sb.append(this.readaheadThreshold);
       first = false;
       if (!first) sb.append(", ");
+      sb.append("samplerConfig:");
+      if (this.samplerConfig == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.samplerConfig);
+      }
+      first = false;
+      if (!first) sb.append(", ");
       sb.append("batchTimeOut:");
       sb.append(this.batchTimeOut);
       first = false;
@@ -5708,6 +5833,9 @@ import org.slf4j.LoggerFactory;
       if (range != null) {
         range.validate();
       }
+      if (samplerConfig != null) {
+        samplerConfig.validate();
+      }
     }
 
     private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
@@ -5785,14 +5913,14 @@ import org.slf4j.LoggerFactory;
             case 4: // COLUMNS
               if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
                 {
-                  org.apache.thrift.protocol.TList _list106 = iprot.readListBegin();
-                  struct.columns = new ArrayList<org.apache.accumulo.core.data.thrift.TColumn>(_list106.size);
-                  for (int _i107 = 0; _i107 < _list106.size; ++_i107)
+                  org.apache.thrift.protocol.TList _list116 = iprot.readListBegin();
+                  struct.columns = new ArrayList<org.apache.accumulo.core.data.thrift.TColumn>(_list116.size);
+                  for (int _i117 = 0; _i117 < _list116.size; ++_i117)
                   {
-                    org.apache.accumulo.core.data.thrift.TColumn _elem108;
-                    _elem108 = new org.apache.accumulo.core.data.thrift.TColumn();
-                    _elem108.read(iprot);
-                    struct.columns.add(_elem108);
+                    org.apache.accumulo.core.data.thrift.TColumn _elem118;
+                    _elem118 = new org.apache.accumulo.core.data.thrift.TColumn();
+                    _elem118.read(iprot);
+                    struct.columns.add(_elem118);
                   }
                   iprot.readListEnd();
                 }
@@ -5812,14 +5940,14 @@ import org.slf4j.LoggerFactory;
             case 6: // SSI_LIST
               if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
                 {
-                  org.apache.thrift.protocol.TList _list109 = iprot.readListBegin();
-                  struct.ssiList = new ArrayList<org.apache.accumulo.core.data.thrift.IterInfo>(_list109.size);
-                  for (int _i110 = 0; _i110 < _list109.size; ++_i110)
+                  org.apache.thrift.protocol.TList _list119 = iprot.readListBegin();
+                  struct.ssiList = new ArrayList<org.apache.accumulo.core.data.thrift.IterInfo>(_list119.size);
+                  for (int _i120 = 0; _i120 < _list119.size; ++_i120)
                   {
-                    org.apache.accumulo.core.data.thrift.IterInfo _elem111;
-                    _elem111 = new org.apache.accumulo.core.data.thrift.IterInfo();
-                    _elem111.read(iprot);
-                    struct.ssiList.add(_elem111);
+                    org.apache.accumulo.core.data.thrift.IterInfo _elem121;
+                    _elem121 = new org.apache.accumulo.core.data.thrift.IterInfo();
+                    _elem121.read(iprot);
+                    struct.ssiList.add(_elem121);
                   }
                   iprot.readListEnd();
                 }
@@ -5831,27 +5959,27 @@ import org.slf4j.LoggerFactory;
             case 7: // SSIO
               if (schemeField.type == org.apache.thrift.protocol.TType.MAP) {
                 {
-                  org.apache.thrift.protocol.TMap _map112 = iprot.readMapBegin();
-                  struct.ssio = new HashMap<String,Map<String,String>>(2*_map112.size);
-                  for (int _i113 = 0; _i113 < _map112.size; ++_i113)
+                  org.apache.thrift.protocol.TMap _map122 = iprot.readMapBegin();
+                  struct.ssio = new HashMap<String,Map<String,String>>(2*_map122.size);
+                  for (int _i123 = 0; _i123 < _map122.size; ++_i123)
                   {
-                    String _key114;
-                    Map<String,String> _val115;
-                    _key114 = iprot.readString();
+                    String _key124;
+                    Map<String,String> _val125;
+                    _key124 = iprot.readString();
                     {
-                      org.apache.thrift.protocol.TMap _map116 = iprot.readMapBegin();
-                      _val115 = new HashMap<String,String>(2*_map116.size);
-                      for (int _i117 = 0; _i117 < _map116.size; ++_i117)
+                      org.apache.thrift.protocol.TMap _map126 = iprot.readMapBegin();
+                      _val125 = new HashMap<String,String>(2*_map126.size);
+                      for (int _i127 = 0; _i127 < _map126.size; ++_i127)
                       {
-                        String _key118;
-                        String _val119;
-                        _key118 = iprot.readString();
-                        _val119 = iprot.readString();
-                        _val115.put(_key118, _val119);
+                        String _key128;
+                        String _val129;
+                        _key128 = iprot.readString();
+                        _val129 = iprot.readString();
+                        _val125.put(_key128, _val129);
                       }
                       iprot.readMapEnd();
                     }
-                    struct.ssio.put(_key114, _val115);
+                    struct.ssio.put(_key124, _val125);
                   }
                   iprot.readMapEnd();
                 }
@@ -5863,13 +5991,13 @@ import org.slf4j.LoggerFactory;
             case 8: // AUTHORIZATIONS
               if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
                 {
-                  org.apache.thrift.protocol.TList _list120 = iprot.readListBegin();
-                  struct.authorizations = new ArrayList<ByteBuffer>(_list120.size);
-                  for (int _i121 = 0; _i121 < _list120.size; ++_i121)
+                  org.apache.thrift.protocol.TList _list130 = iprot.readListBegin();
+                  struct.authorizations = new ArrayList<ByteBuffer>(_list130.size);
+                  for (int _i131 = 0; _i131 < _list130.size; ++_i131)
                   {
-                    ByteBuffer _elem122;
-                    _elem122 = iprot.readBinary();
-                    struct.authorizations.add(_elem122);
+                    ByteBuffer _elem132;
+                    _elem132 = iprot.readBinary();
+                    struct.authorizations.add(_elem132);
                   }
                   iprot.readListEnd();
                 }
@@ -5902,7 +6030,16 @@ import org.slf4j.LoggerFactory;
                 org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
               }
               break;
-            case 13: // BATCH_TIME_OUT
+            case 13: // SAMPLER_CONFIG
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.samplerConfig = new TSamplerConfiguration();
+                struct.samplerConfig.read(iprot);
+                struct.setSamplerConfigIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
+            case 14: // BATCH_TIME_OUT
               if (schemeField.type == org.apache.thrift.protocol.TType.I64) {
                 struct.batchTimeOut = iprot.readI64();
                 struct.setBatchTimeOutIsSet(true);
@@ -5944,9 +6081,9 @@ import org.slf4j.LoggerFactory;
           oprot.writeFieldBegin(COLUMNS_FIELD_DESC);
           {
             oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.columns.size()));
-            for (org.apache.accumulo.core.data.thrift.TColumn _iter123 : struct.columns)
+            for (org.apache.accumulo.core.data.thrift.TColumn _iter133 : struct.columns)
             {
-              _iter123.write(oprot);
+              _iter133.write(oprot);
             }
             oprot.writeListEnd();
           }
@@ -5959,9 +6096,9 @@ import org.slf4j.LoggerFactory;
           oprot.writeFieldBegin(SSI_LIST_FIELD_DESC);
           {
             oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.ssiList.size()));
-            for (org.apache.accumulo.core.data.thrift.IterInfo _iter124 : struct.ssiList)
+            for (org.apache.accumulo.core.data.thrift.IterInfo _iter134 : struct.ssiList)
             {
-              _iter124.write(oprot);
+              _iter134.write(oprot);
             }
             oprot.writeListEnd();
           }
@@ -5971,15 +6108,15 @@ import org.slf4j.LoggerFactory;
           oprot.writeFieldBegin(SSIO_FIELD_DESC);
           {
             oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.MAP, struct.ssio.size()));
-            for (Map.Entry<String, Map<String,String>> _iter125 : struct.ssio.entrySet())
+            for (Map.Entry<String, Map<String,String>> _iter135 : struct.ssio.entrySet())
             {
-              oprot.writeString(_iter125.getKey());
+              oprot.writeString(_iter135.getKey());
               {
-                oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, _iter125.getValue().size()));
-                for (Map.Entry<String, String> _iter126 : _iter125.getValue().entrySet())
+                oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, _iter135.getValue().size()));
+                for (Map.Entry<String, String> _iter136 : _iter135.getValue().entrySet())
                 {
-                  oprot.writeString(_iter126.getKey());
-                  oprot.writeString(_iter126.getValue());
+                  oprot.writeString(_iter136.getKey());
+                  oprot.writeString(_iter136.getValue());
                 }
                 oprot.writeMapEnd();
               }
@@ -5992,9 +6129,9 @@ import org.slf4j.LoggerFactory;
           oprot.writeFieldBegin(AUTHORIZATIONS_FIELD_DESC);
           {
             oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.authorizations.size()));
-            for (ByteBuffer _iter127 : struct.authorizations)
+            for (ByteBuffer _iter137 : struct.authorizations)
             {
-              oprot.writeBinary(_iter127);
+              oprot.writeBinary(_iter137);
             }
             oprot.writeListEnd();
           }
@@ -6014,6 +6151,11 @@ import org.slf4j.LoggerFactory;
         oprot.writeFieldBegin(READAHEAD_THRESHOLD_FIELD_DESC);
         oprot.writeI64(struct.readaheadThreshold);
         oprot.writeFieldEnd();
+        if (struct.samplerConfig != null) {
+          oprot.writeFieldBegin(SAMPLER_CONFIG_FIELD_DESC);
+          struct.samplerConfig.write(oprot);
+          oprot.writeFieldEnd();
+        }
         oprot.writeFieldBegin(BATCH_TIME_OUT_FIELD_DESC);
         oprot.writeI64(struct.batchTimeOut);
         oprot.writeFieldEnd();
@@ -6071,10 +6213,13 @@ import org.slf4j.LoggerFactory;
         if (struct.isSetReadaheadThreshold()) {
           optionals.set(11);
         }
-        if (struct.isSetBatchTimeOut()) {
+        if (struct.isSetSamplerConfig()) {
           optionals.set(12);
         }
-        oprot.writeBitSet(optionals, 13);
+        if (struct.isSetBatchTimeOut()) {
+          optionals.set(13);
+        }
+        oprot.writeBitSet(optionals, 14);
         if (struct.isSetTinfo()) {
           struct.tinfo.write(oprot);
         }
@@ -6090,9 +6235,9 @@ import org.slf4j.LoggerFactory;
         if (struct.isSetColumns()) {
           {
             oprot.writeI32(struct.columns.size());
-            for (org.apache.accumulo.core.data.thrift.TColumn _iter128 : struct.columns)
+            for (org.apache.accumulo.core.data.thrift.TColumn _iter138 : struct.columns)
             {
-              _iter128.write(oprot);
+              _iter138.write(oprot);
             }
           }
         }
@@ -6102,24 +6247,24 @@ import org.slf4j.LoggerFactory;
         if (struct.isSetSsiList()) {
           {
             oprot.writeI32(struct.ssiList.size());
-            for (org.apache.accumulo.core.data.thrift.IterInfo _iter129 : struct.ssiList)
+            for (org.apache.accumulo.core.data.thrift.IterInfo _iter139 : struct.ssiList)
             {
-              _iter129.write(oprot);
+              _iter139.write(oprot);
             }
           }
         }
         if (struct.isSetSsio()) {
           {
             oprot.writeI32(struct.ssio.size());
-            for (Map.Entry<String, Map<String,String>> _iter130 : struct.ssio.entrySet())
+            for (Map.Entry<String, Map<String,String>> _iter140 : struct.ssio.entrySet())
             {
-              oprot.writeString(_iter130.getKey());
+              oprot.writeString(_iter140.getKey());
               {
-                oprot.writeI32(_iter130.getValue().size());
-                for (Map.Entry<String, String> _iter131 : _iter130.getValue().entrySet())
+                oprot.writeI32(_iter140.getValue().size());
+                for (Map.Entry<String, String> _iter141 : _iter140.getValue().entrySet())
                 {
-                  oprot.writeString(_iter131.getKey());
-                  oprot.writeString(_iter131.getValue());
+                  oprot.writeString(_iter141.getKey());
+                  oprot.writeString(_iter141.getValue());
                 }
               }
             }
@@ -6128,9 +6273,9 @@ import org.slf4j.LoggerFactory;
         if (struct.isSetAuthorizations()) {
           {
             oprot.writeI32(struct.authorizations.size());
-            for (ByteBuffer _iter132 : struct.authorizations)
+            for (ByteBuffer _iter142 : struct.authorizations)
             {
-              oprot.writeBinary(_iter132);
+              oprot.writeBinary(_iter142);
             }
           }
         }
@@ -6143,6 +6288,9 @@ import org.slf4j.LoggerFactory;
         if (struct.isSetReadaheadThreshold()) {
           oprot.writeI64(struct.readaheadThreshold);
         }
+        if (struct.isSetSamplerConfig()) {
+          struct.samplerConfig.write(oprot);
+        }
         if (struct.isSetBatchTimeOut()) {
           oprot.writeI64(struct.batchTimeOut);
         }
@@ -6151,7 +6299,7 @@ import org.slf4j.LoggerFactory;
       @Override
       public void read(org.apache.thrift.protocol.TProtocol prot, startScan_args struct) throws org.apache.thrift.TException {
         TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(13);
+        BitSet incoming = iprot.readBitSet(14);
         if (incoming.get(0)) {
           struct.tinfo = new org.apache.accumulo.core.trace.thrift.TInfo();
           struct.tinfo.read(iprot);
@@ -6174,14 +6322,14 @@ import org.slf4j.LoggerFactory;
         }
         if (incoming.get(4)) {
           {
-            org.apache.thrift.protocol.TList _list133 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32());
-            struct.columns = new ArrayList<org.apache.accumulo.core.data.thrift.TColumn>(_list133.size);
-            for (int _i134 = 0; _i134 < _list133.size; ++_i134)
+            org.apache.thrift.protocol.TList _list143 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32());
+            struct.columns = new ArrayList<org.apache.accumulo.core.data.thrift.TColumn>(_list143.size);
+            for (int _i144 = 0; _i144 < _list143.size; ++_i144)
             {
-              org.apache.accumulo.core.data.thrift.TColumn _elem135;
-              _elem135 = new org.apache.accumulo.core.data.thrift.TColumn();
-              _elem135.read(iprot);
-              struct.columns.add(_elem135);
+              org.apache.accumulo.core.data.thrift.TColumn _elem145;
+              _elem145 = new org.apache.accumulo.core.data.thrift.TColumn();
+              _elem145.read(iprot);
+              struct.columns.add(_elem145);
             }
           }
           struct.setColumnsIsSet(true);
@@ -6192,53 +6340,53 @@ import org.slf4j.LoggerFactory;
         }
         if (incoming.get(6)) {
           {
-            org.apache.thrift.protocol.TList _list136 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32());
-            struct.ssiList = new ArrayList<org.apache.accumulo.core.data.thrift.IterInfo>(_list136.size);
-            for (int _i137 = 0; _i137 < _list136.size; ++_i137)
+            org.apache.thrift.protocol.TList _list146 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32());
+            struct.ssiList = new ArrayList<org.apache.accumulo.core.data.thrift.IterInfo>(_list146.size);
+            for (int _i147 = 0; _i147 < _list146.size; ++_i147)
             {
-              org.apache.accumulo.core.data.thrift.IterInfo _elem138;
-              _elem138 = new org.apache.accumulo.core.data.thrift.IterInfo();
-              _elem138.read(iprot);
-              struct.ssiList.add(_elem138);
+              org.apache.accumulo.core.data.thrift.IterInfo _elem148;
+              _elem148 = new org.apache.accumulo.core.data.thrift.IterInfo();
+              _elem148.read(iprot);
+              struct.ssiList.add(_elem148);
             }
           }
           struct.setSsiListIsSet(true);
         }
         if (incoming.get(7)) {
           {
-            org.apache.thrift.protocol.TMap _map139 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.MAP, iprot.readI32());
-            struct.ssio = new HashMap<String,Map<String,String>>(2*_map139.size);
-            for (int _i140 = 0; _i140 < _map139.size; ++_i140)
+            org.apache.thrift.protocol.TMap _map149 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.MAP, iprot.readI32());
+            struct.ssio = new HashMap<String,Map<String,String>>(2*_map149.size);
+            for (int _i150 = 0; _i150 < _map149.size; ++_i150)
             {
-              String _key141;
-              Map<String,String> _val142;
-              _key141 = iprot.readString();
+              String _key151;
+              Map<String,String> _val152;
+              _key151 = iprot.readString();
               {
-                org.apache.thrift.protocol.TMap _map143 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, iprot.readI32());
-                _val142 = new HashMap<String,String>(2*_map143.size);
-                for (int _i144 = 0; _i144 < _map143.size; ++_i144)
+                org.apache.thrift.protocol.TMap _map153 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, iprot.readI32());
+                _val152 = new HashMap<String,String>(2*_map153.size);
+                for (int _i154 = 0; _i154 < _map153.size; ++_i154)
                 {
-                  String _key145;
-                  String _val146;
-                  _key145 = iprot.readString();
-                  _val146 = iprot.readString();
-                  _val142.put(_key145, _val146);
+                  String _key155;
+                  String _val156;
+                  _key155 = iprot.readString();
+                  _val156 = iprot.readString();
+                  _val152.put(_key155, _val156);
                 }
               }
-              struct.ssio.put(_key141, _val142);
+              struct.ssio.put(_key151, _val152);
             }
           }
           struct.setSsioIsSet(true);
         }
         if (incoming.get(8)) {
           {
-            org.apache.thrift.protocol.TList _list147 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32());
-            struct.authorizations = new ArrayList<ByteBuffer>(_list147.size);
-            for (int _i148 = 0; _i148 < _list147.size; ++_i148)
+            org.apache.thrift.protocol.TList _list157 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32());
+            struct.authorizations = new ArrayList<ByteBuffer>(_list157.size);
+            for (int _i158 = 0; _i158 < _list157.size; ++_i158)
             {
-              ByteBuffer _elem149;
-              _elem149 = iprot.readBinary();
-              struct.authorizations.add(_elem149);
+              ByteBuffer _elem159;
+              _elem159 = iprot.readBinary();
+              struct.authorizations.add(_elem159);
             }
           }
           struct.setAuthorizationsIsSet(true);
@@ -6256,6 +6404,11 @@ import org.slf4j.LoggerFactory;
           struct.setReadaheadThresholdIsSet(true);
         }
         if (incoming.get(12)) {
+          struct.samplerConfig = new TSamplerConfiguration();
+          struct.samplerConfig.read(iprot);
+          struct.setSamplerConfigIsSet(true);
+        }
+        if (incoming.get(13)) {
           struct.batchTimeOut = iprot.readI64();
           struct.setBatchTimeOutIsSet(true);
         }
@@ -6271,6 +6424,7 @@ import org.slf4j.LoggerFactory;
     private static final org.apache.thrift.protocol.TField SEC_FIELD_DESC = new org.apache.thrift.protocol.TField("sec", org.apache.thrift.protocol.TType.STRUCT, (short)1);
     private static final org.apache.thrift.protocol.TField NSTE_FIELD_DESC = new org.apache.thrift.protocol.TField("nste", org.apache.thrift.protocol.TType.STRUCT, (short)2);
     private static final org.apache.thrift.protocol.TField TMFE_FIELD_DESC = new org.apache.thrift.protocol.TField("tmfe", org.apache.thrift.protocol.TType.STRUCT, (short)3);
+    private static final org.apache.thrift.protocol.TField TSNPE_FIELD_DESC = new org.apache.thrift.protocol.TField("tsnpe", org.apache.thrift.protocol.TType.STRUCT, (short)4);
 
     private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
     static {
@@ -6282,13 +6436,15 @@ import org.slf4j.LoggerFactory;
     public org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException sec; // required
     public NotServingTabletException nste; // required
     public TooManyFilesException tmfe; // required
+    public TSampleNotPresentException tsnpe; // required
 
     /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
     public enum _Fields implements org.apache.thrift.TFieldIdEnum {
       SUCCESS((short)0, "success"),
       SEC((short)1, "sec"),
       NSTE((short)2, "nste"),
-      TMFE((short)3, "tmfe");
+      TMFE((short)3, "tmfe"),
+      TSNPE((short)4, "tsnpe");
 
       private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
 
@@ -6311,6 +6467,8 @@ import org.slf4j.LoggerFactory;
             return NSTE;
           case 3: // TMFE
             return TMFE;
+          case 4: // TSNPE
+            return TSNPE;
           default:
             return null;
         }
@@ -6362,6 +6520,8 @@ import org.slf4j.LoggerFactory;
           new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRUCT)));
       tmpMap.put(_Fields.TMFE, new org.apache.thrift.meta_data.FieldMetaData("tmfe", org.apache.thrift.TFieldRequirementType.DEFAULT, 
           new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRUCT)));
+      tmpMap.put(_Fields.TSNPE, new org.apache.thrift.meta_data.FieldMetaData("tsnpe", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRUCT)));
       metaDataMap = Collections.unmodifiableMap(tmpMap);
       org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(startScan_result.class, metaDataMap);
     }
@@ -6373,13 +6533,15 @@ import org.slf4j.LoggerFactory;
       org.apache.accumulo.core.data.thrift.InitialScan success,
       org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException sec,
       NotServingTabletException nste,
-      TooManyFilesException tmfe)
+      TooManyFilesException tmfe,
+      TSampleNotPresentException tsnpe)
     {
       this();
       this.success = success;
       this.sec = sec;
       this.nste = nste;
       this.tmfe = tmfe;
+      this.tsnpe = tsnpe;
     }
 
     /**
@@ -6398,6 +6560,9 @@ import org.slf4j.LoggerFactory;
       if (other.isSetTmfe()) {
         this.tmfe = new TooManyFilesException(other.tmfe);
       }
+      if (other.isSetTsnpe()) {
+        this.tsnpe = new TSampleNotPresentException(other.tsnpe);
+      }
     }
 
     public startScan_result deepCopy() {
@@ -6410,6 +6575,7 @@ import org.slf4j.LoggerFactory;
       this.sec = null;
       this.nste = null;
       this.tmfe = null;
+      this.tsnpe = null;
     }
 
     public org.apache.accumulo.core.data.thrift.InitialScan getSuccess() {
@@ -6508,6 +6674,30 @@ import org.slf4j.LoggerFactory;
       }
     }
 
+    public TSampleNotPresentException getTsnpe() {
+      return this.tsnpe;
+    }
+
+    public startScan_result setTsnpe(TSampleNotPresentException tsnpe) {
+      this.tsnpe = tsnpe;
+      return this;
+    }
+
+    public void unsetTsnpe() {
+      this.tsnpe = null;
+    }
+
+    /** Returns true if field tsnpe is set (has been assigned a value) and false otherwise */
+    public boolean isSetTsnpe() {
+      return this.tsnpe != null;
+    }
+
+    public void setTsnpeIsSet(boolean value) {
+      if (!value) {
+        this.tsnpe = null;
+      }
+    }
+
     public void setFieldValue(_Fields field, Object value) {
       switch (field) {
       case SUCCESS:
@@ -6542,6 +6732,14 @@ import org.slf4j.LoggerFactory;
         }
         break;
 
+      case TSNPE:
+        if (value == null) {
+          unsetTsnpe();
+        } else {
+          setTsnpe((TSampleNotPresentException)value);
+        }
+        break;
+
       }
     }
 
@@ -6559,6 +6757,9 @@ import org.slf4j.LoggerFactory;
       case TMFE:
         return getTmfe();
 
+      case TSNPE:
+        return getTsnpe();
+
       }
       throw new IllegalStateException();
     }
@@ -6578,6 +6779,8 @@ import org.slf4j.LoggerFactory;
         return isSetNste();
       case TMFE:
         return isSetTmfe();
+      case TSNPE:
+        return isSetTsnpe();
       }
       throw new IllegalStateException();
     }
@@ -6631,6 +6834,15 @@ import org.slf4j.LoggerFactory;
           return false;
       }
 
+      boolean this_present_tsnpe = true && this.isSetTsnpe();
+      boolean that_present_tsnpe = true && that.isSetTsnpe();
+      if (this_present_tsnpe || that_present_tsnpe) {
+        if (!(this_present_tsnpe && that_present_tsnpe))
+          return false;
+        if (!this.tsnpe.equals(that.tsnpe))
+          return false;
+      }
+
       return true;
     }
 
@@ -6687,6 +6899,16 @@ import org.slf4j.LoggerFactory;
           return lastComparison;
         }
       }
+      lastComparison = Boolean.valueOf(isSetTsnpe()).compareTo(other.isSetTsnpe());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetTsnpe()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.tsnpe, other.tsnpe);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
       return 0;
     }
 
@@ -6738,6 +6960,14 @@ import org.slf4j.LoggerFactory;
         sb.append(this.tmfe);
       }
       first = false;
+      if (!first) sb.append(", ");
+      sb.append("tsnpe:");
+      if (this.tsnpe == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.tsnpe);
+      }
+      first = false;
       sb.append(")");
       return sb.toString();
     }
@@ -6820,6 +7050,15 @@ import org.slf4j.LoggerFactory;
                 org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
               }
               break;
+            case 4: // TSNPE
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.tsnpe = new TSampleNotPresentException();
+                struct.tsnpe.read(iprot);
+                struct.setTsnpeIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
             default:
               org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
           }
@@ -6855,6 +7094,11 @@ import org.slf4j.LoggerFactory;
           struct.tmfe.write(oprot);
           oprot.writeFieldEnd();
         }
+        if (struct.tsnpe != null) {
+          oprot.writeFieldBegin(TSNPE_FIELD_DESC);
+          struct.tsnpe.write(oprot);
+          oprot.writeFieldEnd();
+        }
         oprot.writeFieldStop();
         oprot.writeStructEnd();
       }
@@ -6885,7 +7129,10 @@ import org.slf4j.LoggerFactory;
         if (struct.isSetTmfe()) {
           optionals.set(3);
         }
-        oprot.writeBitSet(optionals, 4);
+        if (struct.isSetTsnpe()) {
+          optionals.set(4);
+        }
+        oprot.writeBitSet(optionals, 5);
         if (struct.isSetSuccess()) {
           struct.success.write(oprot);
         }
@@ -6898,12 +7145,15 @@ import org.slf4j.LoggerFactory;
         if (struct.isSetTmfe()) {
           struct.tmfe.write(oprot);
         }
+        if (struct.isSetTsnpe()) {
+          struct.tsnpe.write(oprot);
+        }
       }
 
       @Override
       public void read(org.apache.thrift.protocol.TProtocol prot, startScan_result struct) throws org.apache.thrift.TException {
         TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(4);
+        BitSet incoming = iprot.readBitSet(5);
         if (incoming.get(0)) {
           struct.success = new org.apache.accumulo.core.data.thrift.InitialScan();
           struct.success.read(iprot);
@@ -6924,6 +7174,11 @@ import org.slf4j.LoggerFactory;
           struct.tmfe.read(iprot);
           struct.setTmfeIsSet(true);
         }
+        if (incoming.get(4)) {
+          struct.tsnpe = new TSampleNotPresentException();
+          struct.tsnpe.read(iprot);
+          struct.setTsnpeIsSet(true);
+        }
       }
     }
 
@@ -7393,6 +7648,7 @@ import org.slf4j.LoggerFactory;
     private static final org.apache.thrift.protocol.TField NSSI_FIELD_DESC = new org.apache.thrift.protocol.TField("nssi", org.apache.thrift.protocol.TType.STRUCT, (short)1);
     private static final org.apache.thrift.protocol.TField NSTE_FIELD_DESC = new org.apache.thrift.protocol.TField("nste", org.apache.thrift.protocol.TType.STRUCT, (short)2);
     private static final org.apache.thrift.protocol.TField TMFE_FIELD_DESC = new org.apache.thrift.protocol.TField("tmfe", org.apache.thrift.protocol.TType.STRUCT, (short)3);
+    private static final org.apache.thrift.protocol.TField TSNPE_FIELD_DESC = new org.apache.thrift.protocol.TField("tsnpe", org.apache.thrift.protocol.TType.STRUCT, (short)4);
 
     private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
     static {
@@ -7404,13 +7660,15 @@ import org.slf4j.LoggerFactory;
     public NoSuchScanIDException nssi; // required
     public NotServingTabletException nste; // required
     public TooManyFilesException tmfe; // required
+    public TSampleNotPresentException tsnpe; // required
 
     /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
     public enum _Fields implements org.apache.thrift.TFieldIdEnum {
       SUCCESS((short)0, "success"),
       NSSI((short)1, "nssi"),
       NSTE((short)2, "nste"),
-      TMFE((short)3, "tmfe");
+      TMFE((short)3, "tmfe"),
+      TSNPE((short)4, "tsnpe");
 
       private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
 
@@ -7433,6 +7691,8 @@ import org.slf4j.LoggerFactory;
             return NSTE;
           case 3: // TMFE
             return TMFE;
+          case 4: // TSNPE
+            return TSNPE;
           default:
             return null;
         }
@@ -7484,6 +7744,8 @@ import org.slf4j.LoggerFactory;
           new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRUCT)));
       tmpMap.put(_Fields.TMFE, new org.apache.thrift.meta_data.FieldMetaData("tmfe", org.apache.thrift.TFieldRequirementType.DEFAULT, 
           new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRUCT)));
+      tmpMap.put(_Fields.TSNPE, new org.apache.thrift.meta_data.FieldMetaData("tsnpe", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRUCT)));
       metaDataMap = Collections.unmodifiableMap(tmpMap);
       org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(continueScan_result.class, metaDataMap);
     }
@@ -7495,13 +7757,15 @@ import org.slf4j.LoggerFactory;
       org.apache.accumulo.core.data.thrift.ScanResult success,
       NoSuchScanIDException nssi,
       NotServingTabletException nste,
-      TooManyFilesException tmfe)
+      TooManyFilesException tmfe,
+      TSampleNotPresentException tsnpe)
     {
       this();
       this.success = success;
       this.nssi = nssi;
       this.nste = nste;
       this.tmfe = tmfe;
+      this.tsnpe = tsnpe;
     }
 
     /**
@@ -7520,6 +7784,9 @@ import org.slf4j.LoggerFactory;
       if (other.isSetTmfe()) {
         this.tmfe = new TooManyFilesException(other.tmfe);
       }
+      if (other.isSetTsnpe()) {
+        this.tsnpe = new TSampleNotPresentException(other.tsnpe);
+      }
     }
 
     public continueScan_result deepCopy() {
@@ -7532,6 +7799,7 @@ import org.slf4j.LoggerFactory;
       this.nssi = null;
       this.nste = null;
       this.tmfe = null;
+      this.tsnpe = null;
     }
 
     public org.apache.accumulo.core.data.thrift.ScanResult getSuccess() {
@@ -7630,6 +7898,30 @@ import org.slf4j.LoggerFactory;
       }
     }
 
+    public TSampleNotPresentException getTsnpe() {
+      return this.tsnpe;
+    }
+
+    public continueScan_result setTsnpe(TSampleNotPresentException tsnpe) {
+      this.tsnpe = tsnpe;
+      return this;
+    }
+
+    public void unsetTsnpe() {
+      this.tsnpe = null;
+    }
+
+    /** Returns true if field tsnpe is set (has been assigned a value) and false otherwise */
+    public boolean isSetTsnpe() {
+      return this.tsnpe != null;
+    }
+
+    public void setTsnpeIsSet(boolean value) {
+      if (!value) {
+        this.tsnpe = null;
+      }
+    }
+
     public void setFieldValue(_Fields field, Object value) {
       switch (field) {
       case SUCCESS:
@@ -7664,6 +7956,14 @@ import org.slf4j.LoggerFactory;
         }
         break;
 
+      case TSNPE:
+        if (value == null) {
+          unsetTsnpe();
+        } else {
+          setTsnpe((TSampleNotPresentException)value);
+        }
+        break;
+
       }
     }
 
@@ -7681,6 +7981,9 @@ import org.slf4j.LoggerFactory;
       case TMFE:
         return getTmfe();
 
+      case TSNPE:
+        return getTsnpe();
+
       }
       throw new IllegalStateException();
     }
@@ -7700,6 +8003,8 @@ import org.slf4j.LoggerFactory;
         return isSetNste();
       case TMFE:
         return isSetTmfe();
+      case TSNPE:
+        return isSetTsnpe();
       }
       throw new IllegalStateException();
     }
@@ -7753,6 +8058,15 @@ import org.slf4j.LoggerFactory;
           return false;
       }
 
+      boolean this_present_tsnpe = true && this.isSetTsnpe();
+      boolean that_present_tsnpe = true && that.isSetTsnpe();
+      if (this_present_tsnpe || that_present_tsnpe) {
+        if (!(this_present_tsnpe && that_present_tsnpe))
+          return false;
+        if (!this.tsnpe.equals(that.tsnpe))
+          return false;
+      }
+
       return true;
     }
 
@@ -7809,6 +8123,16 @@ import org.slf4j.LoggerFactory;
           return lastComparison;
         }
       }
+      lastComparison = Boolean.valueOf(isSetTsnpe()).compareTo(other.isSetTsnpe());
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+      if (isSetTsnpe()) {
+        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.tsnpe, other.tsnpe);
+        if (lastComparison != 0) {
+          return lastComparison;
+        }
+      }
       return 0;
     }
 
@@ -7860,6 +8184,14 @@ import org.slf4j.LoggerFactory;
         sb.append(this.tmfe);
       }
       first = false;
+      if (!first) sb.append(", ");
+      sb.append("tsnpe:");
+      if (this.tsnpe == null) {
+        sb.append("null");
+      } else {
+        sb.append(this.tsnpe);
+      }
+      first = false;
       sb.append(")");
       return sb.toString();
     }
@@ -7942,6 +8274,15 @@ import org.slf4j.LoggerFactory;
                 org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
               }
               break;
+            case 4: // TSNPE
+              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+                struct.tsnpe = new TSampleNotPresentException();
+                struct.tsnpe.read(iprot);
+                struct.setTsnpeIsSet(true);
+              } else { 
+                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+              }
+              break;
             default:
               org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
           }
@@ -7977,6 +8318,11 @@ import org.slf4j.LoggerFactory;
           struct.tmfe.write(oprot);
           oprot.writeFieldEnd();
         }
+        if (struct.tsnpe != null) {
+          oprot.writeFieldBegin(TSNPE_FIELD_DESC);
+          struct.tsnpe.write(oprot);
+          oprot.writeFieldEnd();
+        }
         oprot.writeFieldStop();
         oprot.writeStructEnd();
       }
@@ -8007,7 +8353,10 @@ import org.slf4j.LoggerFactory;
         if (struct.isSetTmfe()) {
           optionals.set(3);
         }
-        oprot.writeBitSet(optionals, 4);
+        if (struct.isSetTsnpe()) {
+          optionals.set(4);
+        }
+        oprot.writeBitSet(optionals, 5);
         if (struct.isSetSuccess()) {
           struct.success.write(oprot);
         }
@@ -8020,12 +8369,15 @@ import org.slf4j.LoggerFactory;
         if (struct.isSetTmfe()) {
           struct.tmfe.write(oprot);
         }
+        if (struct.isSetTsnpe()) {
+          struct.tsnpe.write(oprot);
+        }
       }
 
       @Override
       public void read(org.apache.thrift.protocol.TProtocol prot, continueScan_result struct) throws org.apache.thrift.TException {
         TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(4);
+        BitSet incoming = iprot.readBitSet(5);
         if (incoming.get(0)) {
           struct.success = new org.apache.accumulo.core.data.thrift.ScanResult();
           struct.success.read(iprot);
@@ -8046,6 +8398,11 @@ import org.slf4j.LoggerFactory;
           struct.tmfe.read(iprot);
           struct.setTmfeIsSet(true);
         }
+        if (incoming.get(4)) {
+          struct.tsnpe = new TSampleNotPresentException();
+          struct.tsnpe.read(iprot);
+          struct.setTsnpeIsSet(true);
+        }
       }
     }
 
@@ -8519,7 +8876,8 @@ import org.slf4j.LoggerFactory;
     private static final org.apache.thrift.protocol.TField SSIO_FIELD_DESC = new org.apache.thrift.protocol.TField("ssio", org.apache.thrift.protocol.TType.MAP, (short)5);
     private static final org.apache.thrift.protocol.TField AUTHORIZATIONS_FIELD_DESC = new org.apache.thrift.protocol.TField("authorizations", org.apache.thrift.protocol.TType.LIST, (short)6);
     private static final org.apache.thrift.protocol.TField WAIT_FOR_WRITES_FIELD_DESC = new org.apache.thrift.protocol.TField("waitForWrites", org.apache.thrift.protocol.TType.BOOL, (short)7);
-    private static final org.apache.thrift.protocol.TField BATCH_TIME_OUT_FIELD_DESC = new org.apache.thrift.protocol.TField("batchTimeOut", org.apache.thrift.protocol.TType.I64, (short)9);
+    private static final org.apache.thrift.protocol.TField SAMPLER_CONFIG_FIELD_DESC = new org.apache.thrift.protocol.TField("samplerConfig", org.apache.thrift.protocol.TType.STRUCT, (short)9);
+    private static final org.apache.thrift.protocol.TField BATCH_TIME_OUT_FIELD_DESC = new org.apache.thrift.protocol.TField("batchTimeOut", org.apache.thrift.protocol.TType.I64, (short)10);
 
     private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
     static {
@@ -8535,6 +8893,7 @@ import org.slf4j.LoggerFactory;
     public Map<String,Map<String,String>> ssio; // required
     public List<ByteBuffer> authorizations; // required
     public boolean waitForWrites; // required
+    public TSamplerConfiguration samplerConfig; // required
     public long batchTimeOut; // required
 
     /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
@@ -8547,7 +8906,8 @@ import org.slf4j.LoggerFactory;
       SSIO((short)5, "ssio"),
       AUTHORIZATIONS((short)6, "authorizations"),
       WAIT_FOR_WRITES((short)7, "waitForWrites"),
-      BATCH_TIME_OUT((short)9, "batchTimeOut");
+      SAMPLER_CONFIG((short)9, "samplerConfig"),
+      BATCH_TIME_OUT((short)10, "batchTimeOut");
 
       private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
 
@@ -8578,7 +8938,9 @@ import org.slf4j.LoggerFactory;
             return AUTHORIZATIONS;
           case 7: // WAIT_FOR_WRITES
             return WAIT_FOR_WRITES;
-          case 9: // BATCH_TIME_OUT
+          case 9: // SAMPLER_CONFIG
+            return SAMPLER_CONFIG;
+          case 10: // BATCH_TIME_OUT
             return BATCH_TIME_OUT;
           default:
             return null;
@@ -8649,6 +9011,8 @@ import org.slf4j.LoggerFactory;
               new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING              , true))));
       tmpMap.put(_Fields.WAIT_FOR_WRITES, new org.apache.thrift.meta_data.FieldMetaData("waitForWrites", org.apache.thrift.TFieldRequirementType.DEFAULT, 
           new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL)));
+      tmpMap.put(_Fields.SAMPLER_CONFIG, new org.apache.thrift.meta_data.FieldMetaData("samplerConfig", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSamplerConfiguration.class)));
       tmpMap.put(

<TRUNCATED>

[7/7] accumulo git commit: ACCUMULO-3913 Added per table sampling

Posted by kt...@apache.org.
ACCUMULO-3913 Added per table sampling


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/45f18c17
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/45f18c17
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/45f18c17

Branch: refs/heads/master
Commit: 45f18c174612d1a41eed1d2eec9e77d3b3e71a82
Parents: fdcc169
Author: Keith Turner <kt...@apache.org>
Authored: Mon Sep 21 09:44:47 2015 -0400
Committer: Keith Turner <kt...@apache.org>
Committed: Mon Sep 21 09:46:39 2015 -0400

----------------------------------------------------------------------
 .../core/client/ClientSideIteratorScanner.java  |  161 +-
 .../core/client/SampleNotPresentException.java  |   42 +
 .../accumulo/core/client/ScannerBase.java       |   46 +
 .../client/admin/NewTableConfiguration.java     |   31 +
 .../core/client/admin/SamplerConfiguration.java |   91 +
 .../core/client/admin/TableOperations.java      |   29 +
 .../client/impl/BaseIteratorEnvironment.java    |   83 +
 .../core/client/impl/OfflineIterator.java       |   52 +-
 .../core/client/impl/ScannerIterator.java       |    6 +-
 .../core/client/impl/ScannerOptions.java        |   29 +-
 .../core/client/impl/TableOperationsImpl.java   |   39 +
 .../impl/TabletServerBatchReaderIterator.java   |   17 +-
 .../core/client/impl/ThriftScanner.java         |   25 +-
 .../core/client/mapred/AbstractInputFormat.java |   10 +
 .../client/mapred/AccumuloFileOutputFormat.java |   15 +
 .../core/client/mapred/InputFormatBase.java     |   19 +
 .../client/mapreduce/AbstractInputFormat.java   |   10 +
 .../mapreduce/AccumuloFileOutputFormat.java     |   15 +
 .../core/client/mapreduce/InputFormatBase.java  |   19 +
 .../core/client/mapreduce/InputTableConfig.java |   26 +
 .../core/client/mapreduce/RangeInputSplit.java  |   21 +
 .../core/client/mapreduce/impl/SplitUtils.java  |    2 +
 .../lib/impl/FileOutputConfigurator.java        |   29 +-
 .../mapreduce/lib/impl/InputConfigurator.java   |   53 +-
 .../core/client/mock/MockScannerBase.java       |   16 +
 .../core/client/mock/MockTableOperations.java   |   17 +
 .../core/compaction/CompactionSettings.java     |    1 +
 .../accumulo/core/compaction/NullType.java      |   29 +
 .../org/apache/accumulo/core/conf/Property.java |   12 +-
 .../accumulo/core/file/BloomFilterLayer.java    |    6 +
 .../accumulo/core/file/FileSKVIterator.java     |    5 +-
 .../core/file/map/MapFileOperations.java        |    6 +
 .../core/file/rfile/MultiIndexIterator.java     |    6 +
 .../core/file/rfile/MultiLevelIndex.java        |    4 +-
 .../accumulo/core/file/rfile/PrintInfo.java     |   28 +-
 .../apache/accumulo/core/file/rfile/RFile.java  |  511 +++--
 .../core/file/rfile/RFileOperations.java        |   12 +-
 .../core/iterators/IteratorEnvironment.java     |   50 +
 .../core/iterators/SortedMapIterator.java       |    4 +
 .../core/iterators/WrappingIterator.java        |    8 -
 .../core/iterators/system/EmptyIterator.java    |   72 +
 .../core/iterators/system/MapFileIterator.java  |    6 +
 .../core/iterators/system/SampleIterator.java   |   46 +
 .../iterators/system/SequenceFileIterator.java  |    6 +
 .../core/sample/AbstractHashSampler.java        |  106 ++
 .../accumulo/core/sample/RowColumnSampler.java  |  124 ++
 .../apache/accumulo/core/sample/RowSampler.java |   49 +
 .../apache/accumulo/core/sample/Sampler.java    |   57 +
 .../sample/impl/SamplerConfigurationImpl.java   |  184 ++
 .../core/sample/impl/SamplerFactory.java        |   48 +
 .../thrift/TSampleNotPresentException.java      |  409 ++++
 .../thrift/TSamplerConfiguration.java           |  556 ++++++
 .../thrift/TabletClientService.java             | 1762 ++++++++++++------
 .../accumulo/core/util/LocalityGroupUtil.java   |    4 +-
 core/src/main/thrift/tabletserver.thrift        |   19 +-
 .../client/impl/TableOperationsHelperTest.java  |   17 +
 .../mapred/AccumuloFileOutputFormatTest.java    |   14 +-
 .../mapreduce/AccumuloFileOutputFormatTest.java |   13 +
 .../core/file/rfile/MultiLevelIndexTest.java    |    3 +-
 .../accumulo/core/file/rfile/RFileTest.java     |  333 +++-
 .../iterators/DefaultIteratorEnvironment.java   |   25 +-
 .../iterators/FirstEntryInRowIteratorTest.java  |   37 +-
 .../core/iterators/SortedMapIteratorTest.java   |   46 +
 .../iterators/user/RowDeletingIteratorTest.java |   30 +-
 .../iterators/user/RowEncodingIteratorTest.java |   52 +-
 .../user/TransformingIteratorTest.java          |   41 +-
 .../apache/accumulo/core/file/rfile/ver_7.rf    |  Bin 0 -> 14557 bytes
 .../main/asciidoc/accumulo_user_manual.asciidoc |    2 +
 docs/src/main/asciidoc/chapters/sampling.txt    |   86 +
 docs/src/main/resources/examples/README         |    2 +
 docs/src/main/resources/examples/README.sample  |  192 ++
 .../examples/simple/sample/SampleExample.java   |  150 ++
 .../shard/CutoffIntersectingIterator.java       |  123 ++
 .../accumulo/examples/simple/shard/Query.java   |   31 +-
 .../server/util/VerifyTabletAssignments.java    |    2 +-
 .../iterators/MetadataBulkLoadFilterTest.java   |   25 +-
 .../server/replication/StatusCombinerTest.java  |   39 +-
 .../monitor/servlets/trace/NullScanner.java     |   11 +
 .../apache/accumulo/tserver/FileManager.java    |   30 +-
 .../apache/accumulo/tserver/InMemoryMap.java    |  244 ++-
 .../tserver/MemKeyConversionIterator.java       |    6 +-
 .../org/apache/accumulo/tserver/MemValue.java   |   63 +-
 .../org/apache/accumulo/tserver/NativeMap.java  |    4 +
 .../tserver/TabletIteratorEnvironment.java      |   61 +-
 .../apache/accumulo/tserver/TabletServer.java   |   38 +-
 .../ConfigurableCompactionStrategy.java         |   22 +
 .../accumulo/tserver/scan/LookupTask.java       |    5 +-
 .../accumulo/tserver/scan/NextBatchTask.java    |    5 +-
 .../tserver/session/MultiScanSession.java       |    5 +-
 .../accumulo/tserver/tablet/ScanDataSource.java |   24 +-
 .../accumulo/tserver/tablet/ScanOptions.java    |   16 +-
 .../apache/accumulo/tserver/tablet/Tablet.java  |   12 +-
 .../accumulo/tserver/tablet/TabletMemory.java   |    7 +-
 .../accumulo/tserver/InMemoryMapTest.java       |  383 +++-
 .../DefaultCompactionStrategyTest.java          |    6 +
 .../accumulo/shell/commands/CompactCommand.java |    6 +-
 .../accumulo/shell/commands/GrepCommand.java    |    2 +
 .../accumulo/shell/commands/ScanCommand.java    |   23 +
 start/.gitignore                                |    1 +
 .../test/InMemoryMapMemoryUsageTest.java        |    8 +-
 .../java/org/apache/accumulo/test/SampleIT.java |  497 +++++
 .../org/apache/accumulo/test/ShellServerIT.java |   72 +-
 .../accumulo/test/functional/ExamplesIT.java    |    4 +-
 .../accumulo/test/functional/ReadWriteIT.java   |    4 +-
 .../test/mapred/AccumuloFileOutputFormatIT.java |   18 +
 .../test/mapred/AccumuloInputFormatIT.java      |   57 +-
 .../mapreduce/AccumuloFileOutputFormatIT.java   |   18 +
 .../test/mapreduce/AccumuloInputFormatIT.java   |   49 +-
 .../test/performance/thrift/NullTserver.java    |    6 +-
 109 files changed, 6864 insertions(+), 1139 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/ClientSideIteratorScanner.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/ClientSideIteratorScanner.java b/core/src/main/java/org/apache/accumulo/core/client/ClientSideIteratorScanner.java
index eb3c923..5dc6d59 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/ClientSideIteratorScanner.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/ClientSideIteratorScanner.java
@@ -27,6 +27,7 @@ import java.util.TreeSet;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.accumulo.core.Constants;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.impl.ScannerOptions;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.data.ArrayByteSequence;
@@ -44,6 +45,8 @@ import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.hadoop.io.Text;
 
+import com.google.common.base.Preconditions;
+
 /**
  * A scanner that instantiates iterators on the client side instead of on the tablet server. This can be useful for testing iterators or in cases where you
  * don't want iterators affecting the performance of tablet servers.<br>
@@ -60,6 +63,7 @@ public class ClientSideIteratorScanner extends ScannerOptions implements Scanner
   private Range range;
   private boolean isolated = false;
   private long readaheadThreshold = Constants.SCANNER_DEFAULT_READAHEAD_THRESHOLD;
+  private SamplerConfiguration iteratorSamplerConfig;
 
   /**
    * @deprecated since 1.7.0 was never intended for public use. However this could have been used by anything extending this class.
@@ -67,7 +71,7 @@ public class ClientSideIteratorScanner extends ScannerOptions implements Scanner
   @Deprecated
   public class ScannerTranslator extends ScannerTranslatorImpl {
     public ScannerTranslator(Scanner scanner) {
-      super(scanner);
+      super(scanner, scanner.getSamplerConfiguration());
     }
 
     @Override
@@ -76,6 +80,62 @@ public class ClientSideIteratorScanner extends ScannerOptions implements Scanner
     }
   }
 
+  private class ClientSideIteratorEnvironment implements IteratorEnvironment {
+
+    private SamplerConfiguration samplerConfig;
+    private boolean sampleEnabled;
+
+    ClientSideIteratorEnvironment(boolean sampleEnabled, SamplerConfiguration samplerConfig) {
+      this.sampleEnabled = sampleEnabled;
+      this.samplerConfig = samplerConfig;
+    }
+
+    @Override
+    public SortedKeyValueIterator<Key,Value> reserveMapFileReader(String mapFileName) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public AccumuloConfiguration getConfig() {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public IteratorScope getIteratorScope() {
+      return IteratorScope.scan;
+    }
+
+    @Override
+    public boolean isFullMajorCompaction() {
+      return false;
+    }
+
+    @Override
+    public void registerSideChannel(SortedKeyValueIterator<Key,Value> iter) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public Authorizations getAuthorizations() {
+      return ClientSideIteratorScanner.this.getAuthorizations();
+    }
+
+    @Override
+    public IteratorEnvironment cloneWithSamplingEnabled() {
+      return new ClientSideIteratorEnvironment(true, samplerConfig);
+    }
+
+    @Override
+    public boolean isSamplingEnabled() {
+      return sampleEnabled;
+    }
+
+    @Override
+    public SamplerConfiguration getSamplerConfiguration() {
+      return samplerConfig;
+    }
+  }
+
   /**
    * A class that wraps a Scanner in a SortedKeyValueIterator so that other accumulo iterators can use it as a source.
    */
@@ -83,6 +143,7 @@ public class ClientSideIteratorScanner extends ScannerOptions implements Scanner
     protected Scanner scanner;
     Iterator<Entry<Key,Value>> iter;
     Entry<Key,Value> top = null;
+    private SamplerConfiguration samplerConfig;
 
     /**
      * Constructs an accumulo iterator from a scanner.
@@ -90,8 +151,9 @@ public class ClientSideIteratorScanner extends ScannerOptions implements Scanner
      * @param scanner
      *          the scanner to iterate over
      */
-    public ScannerTranslatorImpl(final Scanner scanner) {
+    public ScannerTranslatorImpl(final Scanner scanner, SamplerConfiguration samplerConfig) {
       this.scanner = scanner;
+      this.samplerConfig = samplerConfig;
     }
 
     @Override
@@ -122,6 +184,13 @@ public class ClientSideIteratorScanner extends ScannerOptions implements Scanner
       for (ByteSequence colf : columnFamilies) {
         scanner.fetchColumnFamily(new Text(colf.toArray()));
       }
+
+      if (samplerConfig == null) {
+        scanner.clearSamplerConfiguration();
+      } else {
+        scanner.setSamplerConfiguration(samplerConfig);
+      }
+
       iter = scanner.iterator();
       next();
     }
@@ -138,7 +207,7 @@ public class ClientSideIteratorScanner extends ScannerOptions implements Scanner
 
     @Override
     public SortedKeyValueIterator<Key,Value> deepCopy(final IteratorEnvironment env) {
-      return new ScannerTranslatorImpl(scanner);
+      return new ScannerTranslatorImpl(scanner, env.isSamplingEnabled() ? env.getSamplerConfiguration() : null);
     }
   }
 
@@ -151,19 +220,22 @@ public class ClientSideIteratorScanner extends ScannerOptions implements Scanner
    *          the source scanner
    */
   public ClientSideIteratorScanner(final Scanner scanner) {
-    smi = new ScannerTranslatorImpl(scanner);
+    smi = new ScannerTranslatorImpl(scanner, scanner.getSamplerConfiguration());
     this.range = scanner.getRange();
     this.size = scanner.getBatchSize();
     this.timeOut = scanner.getTimeout(TimeUnit.MILLISECONDS);
     this.batchTimeOut = scanner.getTimeout(TimeUnit.MILLISECONDS);
     this.readaheadThreshold = scanner.getReadaheadThreshold();
+    SamplerConfiguration samplerConfig = scanner.getSamplerConfiguration();
+    if (samplerConfig != null)
+      setSamplerConfiguration(samplerConfig);
   }
 
   /**
    * Sets the source Scanner.
    */
   public void setSource(final Scanner scanner) {
-    smi = new ScannerTranslatorImpl(scanner);
+    smi = new ScannerTranslatorImpl(scanner, scanner.getSamplerConfiguration());
   }
 
   @Override
@@ -177,6 +249,8 @@ public class ClientSideIteratorScanner extends ScannerOptions implements Scanner
     else
       smi.scanner.disableIsolation();
 
+    smi.samplerConfig = getSamplerConfiguration();
+
     final TreeMap<Integer,IterInfo> tm = new TreeMap<Integer,IterInfo>();
 
     for (IterInfo iterInfo : serverSideIteratorList) {
@@ -185,35 +259,8 @@ public class ClientSideIteratorScanner extends ScannerOptions implements Scanner
 
     SortedKeyValueIterator<Key,Value> skvi;
     try {
-      skvi = IteratorUtil.loadIterators(smi, tm.values(), serverSideIteratorOptions, new IteratorEnvironment() {
-        @Override
-        public SortedKeyValueIterator<Key,Value> reserveMapFileReader(final String mapFileName) throws IOException {
-          return null;
-        }
-
-        @Override
-        public AccumuloConfiguration getConfig() {
-          return null;
-        }
-
-        @Override
-        public IteratorScope getIteratorScope() {
-          return null;
-        }
-
-        @Override
-        public boolean isFullMajorCompaction() {
-          return false;
-        }
-
-        @Override
-        public void registerSideChannel(final SortedKeyValueIterator<Key,Value> iter) {}
-
-        @Override
-        public Authorizations getAuthorizations() {
-          return smi.scanner.getAuthorizations();
-        }
-      }, false, null);
+      skvi = IteratorUtil.loadIterators(smi, tm.values(), serverSideIteratorOptions, new ClientSideIteratorEnvironment(getSamplerConfiguration() != null,
+          getIteratorSamplerConfigurationInternal()), false, null);
     } catch (IOException e) {
       throw new RuntimeException(e);
     }
@@ -297,4 +344,50 @@ public class ClientSideIteratorScanner extends ScannerOptions implements Scanner
     }
     this.readaheadThreshold = batches;
   }
+
+  private SamplerConfiguration getIteratorSamplerConfigurationInternal() {
+    SamplerConfiguration scannerSamplerConfig = getSamplerConfiguration();
+    if (scannerSamplerConfig != null) {
+      if (iteratorSamplerConfig != null && !iteratorSamplerConfig.equals(scannerSamplerConfig)) {
+        throw new IllegalStateException("Scanner and iterator sampler configuration differ");
+      }
+
+      return scannerSamplerConfig;
+    }
+
+    return iteratorSamplerConfig;
+  }
+
+  /**
+   * This is provided for the case where no sampler configuration is set on the scanner, but there is a need to create iterator deep copies that have sampling
+   * enabled. If sampler configuration is set on the scanner, then this method does not need to be called inorder to create deep copies with sampling.
+   *
+   * <p>
+   * Setting this differently than the scanners sampler configuration may cause exceptions.
+   *
+   * @since 1.8.0
+   */
+  public void setIteratorSamplerConfiguration(SamplerConfiguration sc) {
+    Preconditions.checkNotNull(sc);
+    this.iteratorSamplerConfig = sc;
+  }
+
+  /**
+   * Clear any iterator sampler configuration.
+   *
+   * @since 1.8.0
+   */
+  public void clearIteratorSamplerConfiguration() {
+    this.iteratorSamplerConfig = null;
+  }
+
+  /**
+   * @return currently set iterator sampler configuration.
+   *
+   * @since 1.8.0
+   */
+
+  public SamplerConfiguration getIteratorSamplerConfiguration() {
+    return iteratorSamplerConfig;
+  }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/SampleNotPresentException.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/SampleNotPresentException.java b/core/src/main/java/org/apache/accumulo/core/client/SampleNotPresentException.java
new file mode 100644
index 0000000..c70a898
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/client/SampleNotPresentException.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.core.client;
+
+/**
+ * Exception thrown when a table does not have sampling configured or when sampling is configured but it differs from what was requested.
+ *
+ * @since 1.8.0
+ */
+
+public class SampleNotPresentException extends RuntimeException {
+
+  public SampleNotPresentException(String message, Exception cause) {
+    super(message, cause);
+  }
+
+  public SampleNotPresentException(String message) {
+    super(message);
+  }
+
+  public SampleNotPresentException() {
+    super();
+  }
+
+  private static final long serialVersionUID = 1L;
+
+}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/ScannerBase.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/ScannerBase.java b/core/src/main/java/org/apache/accumulo/core/client/ScannerBase.java
index e9d288b..5642785 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/ScannerBase.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/ScannerBase.java
@@ -21,6 +21,7 @@ import java.util.Map.Entry;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.accumulo.core.client.IteratorSetting.Column;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.security.Authorizations;
@@ -176,6 +177,51 @@ public interface ScannerBase extends Iterable<Entry<Key,Value>> {
   Authorizations getAuthorizations();
 
   /**
+   * Setting this will cause the scanner to read sample data, as long as that sample data was generated with the given configuration. By default this is not set
+   * and all data is read.
+   *
+   * <p>
+   * One way to use this method is as follows, where the sampler configuration is obtained from the table configuration. Sample data can be generated in many
+   * different ways, so its important to verify the sample data configuration meets expectations.
+   *
+   * <p>
+   *
+   * <pre>
+   * <code>
+   *   // could cache this if creating many scanners to avoid RPCs.
+   *   SamplerConfiguration samplerConfig = connector.tableOperations().getSamplerConfiguration(table);
+   *   // verify table's sample data is generated in an expected way before using
+   *   userCode.verifySamplerConfig(samplerConfig);
+   *   scanner.setSamplerCongiguration(samplerConfig);
+   * </code>
+   * </pre>
+   *
+   * <p>
+   * Of course this is not the only way to obtain a {@link SamplerConfiguration}, it could be a constant, configuration, etc.
+   *
+   * <p>
+   * If sample data is not present or sample data was generated with a different configuration, then the scanner iterator will throw a
+   * {@link SampleNotPresentException}. Also if a table's sampler configuration is changed while a scanner is iterating over a table, a
+   * {@link SampleNotPresentException} may be thrown.
+   *
+   * @since 1.8.0
+   */
+  void setSamplerConfiguration(SamplerConfiguration samplerConfig);
+
+  /**
+   * @return currently set sampler configuration. Returns null if no sampler configuration is set.
+   * @since 1.8.0
+   */
+  SamplerConfiguration getSamplerConfiguration();
+
+  /**
+   * Clears sampler configuration making a scanner read all data. After calling this, {@link #getSamplerConfiguration()} should return null.
+   *
+   * @since 1.8.0
+   */
+  void clearSamplerConfiguration();
+
+  /**
    * This setting determines how long a scanner will wait to fill the returned batch. By default, a scanner wait until the batch is full.
    *
    * <p>

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/admin/NewTableConfiguration.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/admin/NewTableConfiguration.java b/core/src/main/java/org/apache/accumulo/core/client/admin/NewTableConfiguration.java
index 4db1d89..2107dc8 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/admin/NewTableConfiguration.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/admin/NewTableConfiguration.java
@@ -24,6 +24,9 @@ import java.util.Map;
 
 import org.apache.accumulo.core.iterators.IteratorUtil;
 import org.apache.accumulo.core.iterators.user.VersioningIterator;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
+
+import com.google.common.base.Preconditions;
 
 /**
  * This object stores table creation parameters. Currently includes: {@link TimeType}, whether to include default iterators, and user-specified initial
@@ -39,6 +42,7 @@ public class NewTableConfiguration {
   private boolean limitVersion = true;
 
   private Map<String,String> properties = new HashMap<String,String>();
+  private SamplerConfiguration samplerConfiguration;
 
   /**
    * Configure logical or millisecond time for tables created with this configuration.
@@ -84,6 +88,7 @@ public class NewTableConfiguration {
    */
   public NewTableConfiguration setProperties(Map<String,String> prop) {
     checkArgument(prop != null, "properties is null");
+    checkDisjoint(prop, samplerConfiguration);
 
     this.properties = new HashMap<String,String>(prop);
     return this;
@@ -101,7 +106,33 @@ public class NewTableConfiguration {
       propertyMap.putAll(IteratorUtil.generateInitialTableProperties(limitVersion));
     }
 
+    if (samplerConfiguration != null) {
+      propertyMap.putAll(new SamplerConfigurationImpl(samplerConfiguration).toTablePropertiesMap());
+    }
+
     propertyMap.putAll(properties);
     return Collections.unmodifiableMap(propertyMap);
   }
+
+  private void checkDisjoint(Map<String,String> props, SamplerConfiguration samplerConfiguration) {
+    if (props.isEmpty() || samplerConfiguration == null) {
+      return;
+    }
+
+    Map<String,String> sampleProps = new SamplerConfigurationImpl(samplerConfiguration).toTablePropertiesMap();
+
+    checkArgument(Collections.disjoint(props.keySet(), sampleProps.keySet()), "Properties and derived sampler properties are not disjoint");
+  }
+
+  /**
+   * Enable building a sample data set on the new table using the given sampler configuration.
+   *
+   * @since 1.8.0
+   */
+  public NewTableConfiguration enableSampling(SamplerConfiguration samplerConfiguration) {
+    Preconditions.checkNotNull(samplerConfiguration);
+    checkDisjoint(properties, samplerConfiguration);
+    this.samplerConfiguration = samplerConfiguration;
+    return this;
+  }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/admin/SamplerConfiguration.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/admin/SamplerConfiguration.java b/core/src/main/java/org/apache/accumulo/core/client/admin/SamplerConfiguration.java
new file mode 100644
index 0000000..079d324
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/client/admin/SamplerConfiguration.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.core.client.admin;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * This class encapsultes configuration and options needed to setup and use sampling.
+ *
+ * @since 1.8.0
+ */
+
+public class SamplerConfiguration {
+
+  private String className;
+  private Map<String,String> options = new HashMap<>();
+
+  public SamplerConfiguration(String samplerClassName) {
+    Preconditions.checkNotNull(samplerClassName);
+    this.className = samplerClassName;
+  }
+
+  public SamplerConfiguration setOptions(Map<String,String> options) {
+    Preconditions.checkNotNull(options);
+    this.options = new HashMap<>(options.size());
+
+    for (Entry<String,String> entry : options.entrySet()) {
+      addOption(entry.getKey(), entry.getValue());
+    }
+
+    return this;
+  }
+
+  public SamplerConfiguration addOption(String option, String value) {
+    checkArgument(option != null, "option is null");
+    checkArgument(value != null, "value is null");
+    this.options.put(option, value);
+    return this;
+  }
+
+  public Map<String,String> getOptions() {
+    return Collections.unmodifiableMap(options);
+  }
+
+  public String getSamplerClassName() {
+    return className;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (o instanceof SamplerConfiguration) {
+      SamplerConfiguration osc = (SamplerConfiguration) o;
+
+      return className.equals(osc.className) && options.equals(osc.options);
+    }
+
+    return false;
+  }
+
+  @Override
+  public int hashCode() {
+    return className.hashCode() + 31 * options.hashCode();
+  }
+
+  @Override
+  public String toString() {
+    return className + " " + options;
+  }
+}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/admin/TableOperations.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/admin/TableOperations.java b/core/src/main/java/org/apache/accumulo/core/client/admin/TableOperations.java
index b7c70e9..fa6fef4 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/admin/TableOperations.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/admin/TableOperations.java
@@ -756,4 +756,33 @@ public interface TableOperations {
    */
   boolean testClassLoad(String tableName, final String className, final String asTypeName) throws AccumuloException, AccumuloSecurityException,
       TableNotFoundException;
+
+  /**
+   * Set or update the sampler configuration for a table. If the table has existing sampler configuration, those properties will be cleared before setting the
+   * new table properties.
+   *
+   * @param tableName
+   *          the name of the table
+   * @since 1.8.0
+   */
+  void setSamplerConfiguration(String tableName, SamplerConfiguration samplerConfiguration) throws TableNotFoundException, AccumuloException,
+      AccumuloSecurityException;
+
+  /**
+   * Clear all sampling configuration properties on the table.
+   *
+   * @param tableName
+   *          the name of the table
+   * @since 1.8.0
+   */
+  void clearSamplerConfiguration(String tableName) throws TableNotFoundException, AccumuloException, AccumuloSecurityException;
+
+  /**
+   * Reads the sampling configuration properties for a table.
+   *
+   * @param tableName
+   *          the name of the table
+   * @since 1.8.0
+   */
+  SamplerConfiguration getSamplerConfiguration(String tableName) throws TableNotFoundException, AccumuloException, AccumuloSecurityException;
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/impl/BaseIteratorEnvironment.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/impl/BaseIteratorEnvironment.java b/core/src/main/java/org/apache/accumulo/core/client/impl/BaseIteratorEnvironment.java
new file mode 100644
index 0000000..dc138ce
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/client/impl/BaseIteratorEnvironment.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.core.client.impl;
+
+import java.io.IOException;
+
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
+import org.apache.accumulo.core.conf.AccumuloConfiguration;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.IteratorEnvironment;
+import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
+import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
+import org.apache.accumulo.core.security.Authorizations;
+
+/**
+ * An implementation of {@link IteratorEnvironment} that throws {@link UnsupportedOperationException} for each operation. This is useful for situations that
+ * need to extend {@link IteratorEnvironment} and implement a subset of the methods.
+ */
+
+public class BaseIteratorEnvironment implements IteratorEnvironment {
+
+  @Override
+  public SortedKeyValueIterator<Key,Value> reserveMapFileReader(String mapFileName) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public AccumuloConfiguration getConfig() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public IteratorScope getIteratorScope() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public boolean isFullMajorCompaction() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void registerSideChannel(SortedKeyValueIterator<Key,Value> iter) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public Authorizations getAuthorizations() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public boolean isSamplingEnabled() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public SamplerConfiguration getSamplerConfiguration() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public IteratorEnvironment cloneWithSamplingEnabled() {
+    throw new UnsupportedOperationException();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/impl/OfflineIterator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/impl/OfflineIterator.java b/core/src/main/java/org/apache/accumulo/core/client/impl/OfflineIterator.java
index 793b044..9cce089 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/impl/OfflineIterator.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/impl/OfflineIterator.java
@@ -16,6 +16,8 @@
  */
 package org.apache.accumulo.core.client.impl;
 
+import static com.google.common.util.concurrent.Uninterruptibles.sleepUninterruptibly;
+
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -30,8 +32,10 @@ import org.apache.accumulo.core.client.AccumuloException;
 import org.apache.accumulo.core.client.Connector;
 import org.apache.accumulo.core.client.Instance;
 import org.apache.accumulo.core.client.RowIterator;
+import org.apache.accumulo.core.client.SampleNotPresentException;
 import org.apache.accumulo.core.client.Scanner;
 import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.conf.ConfigurationCopy;
 import org.apache.accumulo.core.conf.Property;
@@ -57,6 +61,7 @@ import org.apache.accumulo.core.master.state.tables.TableState;
 import org.apache.accumulo.core.metadata.MetadataTable;
 import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection;
 import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.DataFileColumnFamily;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.security.ColumnVisibility;
 import org.apache.accumulo.core.util.CachedConfiguration;
@@ -68,16 +73,20 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.io.Text;
 
-import static com.google.common.util.concurrent.Uninterruptibles.sleepUninterruptibly;
-
 class OfflineIterator implements Iterator<Entry<Key,Value>> {
 
   static class OfflineIteratorEnvironment implements IteratorEnvironment {
 
     private final Authorizations authorizations;
+    private AccumuloConfiguration conf;
+    private boolean useSample;
+    private SamplerConfiguration sampleConf;
 
-    public OfflineIteratorEnvironment(Authorizations auths) {
+    public OfflineIteratorEnvironment(Authorizations auths, AccumuloConfiguration acuTableConf, boolean useSample, SamplerConfiguration samplerConf) {
       this.authorizations = auths;
+      this.conf = acuTableConf;
+      this.useSample = useSample;
+      this.sampleConf = samplerConf;
     }
 
     @Override
@@ -87,7 +96,7 @@ class OfflineIterator implements Iterator<Entry<Key,Value>> {
 
     @Override
     public AccumuloConfiguration getConfig() {
-      return AccumuloConfiguration.getDefaultConfiguration();
+      return conf;
     }
 
     @Override
@@ -119,6 +128,23 @@ class OfflineIterator implements Iterator<Entry<Key,Value>> {
       allIters.add(iter);
       return new MultiIterator(allIters, false);
     }
+
+    @Override
+    public boolean isSamplingEnabled() {
+      return useSample;
+    }
+
+    @Override
+    public SamplerConfiguration getSamplerConfiguration() {
+      return sampleConf;
+    }
+
+    @Override
+    public IteratorEnvironment cloneWithSamplingEnabled() {
+      if (sampleConf == null)
+        throw new SampleNotPresentException();
+      return new OfflineIteratorEnvironment(authorizations, conf, true, sampleConf);
+    }
   }
 
   private SortedKeyValueIterator<Key,Value> iter;
@@ -154,6 +180,8 @@ class OfflineIterator implements Iterator<Entry<Key,Value>> {
         nextTablet();
 
     } catch (Exception e) {
+      if (e instanceof RuntimeException)
+        throw (RuntimeException) e;
       throw new RuntimeException(e);
     }
   }
@@ -306,16 +334,30 @@ class OfflineIterator implements Iterator<Entry<Key,Value>> {
 
     readers.clear();
 
+    SamplerConfiguration scannerSamplerConfig = options.getSamplerConfiguration();
+    SamplerConfigurationImpl scannerSamplerConfigImpl = scannerSamplerConfig == null ? null : new SamplerConfigurationImpl(scannerSamplerConfig);
+    SamplerConfigurationImpl samplerConfImpl = SamplerConfigurationImpl.newSamplerConfig(acuTableConf);
+
+    if (scannerSamplerConfigImpl != null && ((samplerConfImpl != null && !scannerSamplerConfigImpl.equals(samplerConfImpl)) || samplerConfImpl == null)) {
+      throw new SampleNotPresentException();
+    }
+
     // TODO need to close files - ACCUMULO-1303
     for (String file : absFiles) {
       FileSystem fs = VolumeConfiguration.getVolume(file, conf, config).getFileSystem();
       FileSKVIterator reader = FileOperations.getInstance().openReader(file, false, fs, conf, acuTableConf, null, null);
+      if (scannerSamplerConfigImpl != null) {
+        reader = reader.getSample(scannerSamplerConfigImpl);
+        if (reader == null)
+          throw new SampleNotPresentException();
+      }
       readers.add(reader);
     }
 
     MultiIterator multiIter = new MultiIterator(readers, extent);
 
-    OfflineIteratorEnvironment iterEnv = new OfflineIteratorEnvironment(authorizations);
+    OfflineIteratorEnvironment iterEnv = new OfflineIteratorEnvironment(authorizations, acuTableConf, false, samplerConfImpl == null ? null
+        : samplerConfImpl.toSamplerConfiguration());
 
     DeletingIterator delIter = new DeletingIterator(multiIter, false);
 

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerIterator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerIterator.java b/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerIterator.java
index 764db21..55b0a85 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerIterator.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerIterator.java
@@ -28,6 +28,7 @@ import java.util.concurrent.TimeUnit;
 
 import org.apache.accumulo.core.client.AccumuloException;
 import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.SampleNotPresentException;
 import org.apache.accumulo.core.client.TableDeletedException;
 import org.apache.accumulo.core.client.TableNotFoundException;
 import org.apache.accumulo.core.client.TableOfflineException;
@@ -90,7 +91,8 @@ public class ScannerIterator implements Iterator<Entry<Key,Value>> {
           synchQ.add(currentBatch);
           return;
         }
-      } catch (IsolationException | ScanTimedOutException | AccumuloException | AccumuloSecurityException | TableDeletedException | TableOfflineException e) {
+      } catch (IsolationException | ScanTimedOutException | AccumuloException | AccumuloSecurityException | TableDeletedException | TableOfflineException
+          | SampleNotPresentException e) {
         log.trace("{}", e.getMessage(), e);
         synchQ.add(e);
       } catch (TableNotFoundException e) {
@@ -119,7 +121,7 @@ public class ScannerIterator implements Iterator<Entry<Key,Value>> {
     }
 
     scanState = new ScanState(context, tableId, authorizations, new Range(range), options.fetchedColumns, size, options.serverSideIteratorList,
-        options.serverSideIteratorOptions, isolated, readaheadThreshold, options.batchTimeOut);
+        options.serverSideIteratorOptions, isolated, readaheadThreshold, options.getSamplerConfiguration(), options.batchTimeOut);
 
     // If we want to start readahead immediately, don't wait for hasNext to be called
     if (0l == readaheadThreshold) {

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerOptions.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerOptions.java b/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerOptions.java
index cc337dd..8d96464 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerOptions.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerOptions.java
@@ -32,6 +32,7 @@ import java.util.concurrent.TimeUnit;
 
 import org.apache.accumulo.core.client.IteratorSetting;
 import org.apache.accumulo.core.client.ScannerBase;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.data.Column;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Value;
@@ -40,6 +41,8 @@ import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.util.TextUtil;
 import org.apache.hadoop.io.Text;
 
+import com.google.common.base.Preconditions;
+
 public class ScannerOptions implements ScannerBase {
 
   protected List<IterInfo> serverSideIteratorList = Collections.emptyList();
@@ -53,6 +56,8 @@ public class ScannerOptions implements ScannerBase {
 
   private String regexIterName = null;
 
+  private SamplerConfiguration samplerConfig = null;
+
   protected ScannerOptions() {}
 
   public ScannerOptions(ScannerOptions so) {
@@ -168,6 +173,8 @@ public class ScannerOptions implements ScannerBase {
         Set<Entry<String,Map<String,String>>> es = src.serverSideIteratorOptions.entrySet();
         for (Entry<String,Map<String,String>> entry : es)
           dst.serverSideIteratorOptions.put(entry.getKey(), new HashMap<String,String>(entry.getValue()));
+
+        dst.samplerConfig = src.samplerConfig;
         dst.batchTimeOut = src.batchTimeOut;
       }
     }
@@ -179,7 +186,7 @@ public class ScannerOptions implements ScannerBase {
   }
 
   @Override
-  public void setTimeout(long timeout, TimeUnit timeUnit) {
+  public synchronized void setTimeout(long timeout, TimeUnit timeUnit) {
     if (timeOut < 0) {
       throw new IllegalArgumentException("TimeOut must be positive : " + timeOut);
     }
@@ -191,7 +198,7 @@ public class ScannerOptions implements ScannerBase {
   }
 
   @Override
-  public long getTimeout(TimeUnit timeunit) {
+  public synchronized long getTimeout(TimeUnit timeunit) {
     return timeunit.convert(timeOut, TimeUnit.MILLISECONDS);
   }
 
@@ -201,11 +208,27 @@ public class ScannerOptions implements ScannerBase {
   }
 
   @Override
-  public Authorizations getAuthorizations() {
+  public synchronized Authorizations getAuthorizations() {
     throw new UnsupportedOperationException("No authorizations to return");
   }
 
   @Override
+  public synchronized void setSamplerConfiguration(SamplerConfiguration samplerConfig) {
+    Preconditions.checkNotNull(samplerConfig);
+    this.samplerConfig = samplerConfig;
+  }
+
+  @Override
+  public synchronized SamplerConfiguration getSamplerConfiguration() {
+    return samplerConfig;
+  }
+
+  @Override
+  public synchronized void clearSamplerConfiguration() {
+    this.samplerConfig = null;
+  }
+
+  @Override
   public void setBatchTimeout(long timeout, TimeUnit timeUnit) {
     if (timeOut < 0) {
       throw new IllegalArgumentException("Batch timeout must be positive : " + timeOut);

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/impl/TableOperationsImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/impl/TableOperationsImpl.java b/core/src/main/java/org/apache/accumulo/core/client/impl/TableOperationsImpl.java
index d65bcec..8434f2f 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/impl/TableOperationsImpl.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/impl/TableOperationsImpl.java
@@ -67,6 +67,7 @@ import org.apache.accumulo.core.client.admin.CompactionConfig;
 import org.apache.accumulo.core.client.admin.DiskUsage;
 import org.apache.accumulo.core.client.admin.FindMax;
 import org.apache.accumulo.core.client.admin.NewTableConfiguration;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.admin.TableOperations;
 import org.apache.accumulo.core.client.admin.TimeType;
 import org.apache.accumulo.core.client.impl.TabletLocator.TabletLocation;
@@ -95,6 +96,7 @@ import org.apache.accumulo.core.metadata.MetadataTable;
 import org.apache.accumulo.core.metadata.RootTable;
 import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection;
 import org.apache.accumulo.core.rpc.ThriftUtil;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.tabletserver.thrift.NotServingTabletException;
 import org.apache.accumulo.core.tabletserver.thrift.TabletClientService;
@@ -1474,4 +1476,41 @@ public class TableOperationsImpl extends TableOperationsHelper {
     }
   }
 
+  private void clearSamplerOptions(String tableName) throws AccumuloException, TableNotFoundException, AccumuloSecurityException {
+    String prefix = Property.TABLE_SAMPLER_OPTS.getKey();
+    for (Entry<String,String> entry : getProperties(tableName)) {
+      String property = entry.getKey();
+      if (property.startsWith(prefix)) {
+        removeProperty(tableName, property);
+      }
+    }
+  }
+
+  @Override
+  public void setSamplerConfiguration(String tableName, SamplerConfiguration samplerConfiguration) throws AccumuloException, TableNotFoundException,
+      AccumuloSecurityException {
+    clearSamplerOptions(tableName);
+
+    List<Pair<String,String>> props = new SamplerConfigurationImpl(samplerConfiguration).toTableProperties();
+    for (Pair<String,String> pair : props) {
+      setProperty(tableName, pair.getFirst(), pair.getSecond());
+    }
+  }
+
+  @Override
+  public void clearSamplerConfiguration(String tableName) throws AccumuloException, TableNotFoundException, AccumuloSecurityException {
+    removeProperty(tableName, Property.TABLE_SAMPLER.getKey());
+    clearSamplerOptions(tableName);
+  }
+
+  @Override
+  public SamplerConfiguration getSamplerConfiguration(String tableName) throws TableNotFoundException, AccumuloException {
+    AccumuloConfiguration conf = new ConfigurationCopy(this.getProperties(tableName));
+    SamplerConfigurationImpl sci = SamplerConfigurationImpl.newSamplerConfig(conf);
+    if (sci == null) {
+      return null;
+    }
+    return sci.toSamplerConfiguration();
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/impl/TabletServerBatchReaderIterator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/impl/TabletServerBatchReaderIterator.java b/core/src/main/java/org/apache/accumulo/core/client/impl/TabletServerBatchReaderIterator.java
index 0b09808..1ff56b9 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/impl/TabletServerBatchReaderIterator.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/impl/TabletServerBatchReaderIterator.java
@@ -39,6 +39,7 @@ import java.util.concurrent.atomic.AtomicLong;
 import org.apache.accumulo.core.client.AccumuloException;
 import org.apache.accumulo.core.client.AccumuloSecurityException;
 import org.apache.accumulo.core.client.Instance;
+import org.apache.accumulo.core.client.SampleNotPresentException;
 import org.apache.accumulo.core.client.TableDeletedException;
 import org.apache.accumulo.core.client.TableNotFoundException;
 import org.apache.accumulo.core.client.TableOfflineException;
@@ -56,8 +57,10 @@ import org.apache.accumulo.core.data.thrift.TKeyValue;
 import org.apache.accumulo.core.data.thrift.TRange;
 import org.apache.accumulo.core.master.state.tables.TableState;
 import org.apache.accumulo.core.rpc.ThriftUtil;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.tabletserver.thrift.NoSuchScanIDException;
+import org.apache.accumulo.core.tabletserver.thrift.TSampleNotPresentException;
 import org.apache.accumulo.core.tabletserver.thrift.TabletClientService;
 import org.apache.accumulo.core.trace.Tracer;
 import org.apache.accumulo.core.util.ByteBufferUtil;
@@ -375,6 +378,8 @@ public class TabletServerBatchReaderIterator implements Iterator<Entry<Key,Value
           fatalException = new TableDeletedException(table);
         else
           fatalException = e;
+      } catch (SampleNotPresentException e) {
+        fatalException = e;
       } catch (Throwable t) {
         if (queryThreadPool.isShutdown())
           log.debug("Caught exception, but queryThreadPool is shutdown", t);
@@ -643,7 +648,8 @@ public class TabletServerBatchReaderIterator implements Iterator<Entry<Key,Value
             Translators.RT));
         InitialMultiScan imsr = client.startMultiScan(Tracer.traceInfo(), context.rpcCreds(), thriftTabletRanges,
             Translator.translate(columns, Translators.CT), options.serverSideIteratorList, options.serverSideIteratorOptions,
-            ByteBufferUtil.toByteBuffers(authorizations.getAuthorizations()), waitForWrites, options.batchTimeOut);
+            ByteBufferUtil.toByteBuffers(authorizations.getAuthorizations()), waitForWrites,
+            SamplerConfigurationImpl.toThrift(options.getSamplerConfiguration()), options.batchTimeOut);
         if (waitForWrites)
           ThriftScanner.serversWaitedForWrites.get(ttype).add(server.toString());
 
@@ -719,6 +725,15 @@ public class TabletServerBatchReaderIterator implements Iterator<Entry<Key,Value
     } catch (NoSuchScanIDException e) {
       log.debug("Server : {} msg : {}", server, e.getMessage(), e);
       throw new IOException(e);
+    } catch (TSampleNotPresentException e) {
+      log.debug("Server : " + server + " msg : " + e.getMessage(), e);
+      String tableInfo = "?";
+      if (e.getExtent() != null) {
+        String tableId = new KeyExtent(e.getExtent()).getTableId().toString();
+        tableInfo = Tables.getPrintableTableInfoFromId(context.getInstance(), tableId);
+      }
+      String message = "Table " + tableInfo + " does not have sampling configured or built";
+      throw new SampleNotPresentException(message, e);
     } catch (TException e) {
       log.debug("Server : {} msg : {}", server, e.getMessage(), e);
       timeoutTracker.errorOccured(e);

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/impl/ThriftScanner.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/impl/ThriftScanner.java b/core/src/main/java/org/apache/accumulo/core/client/impl/ThriftScanner.java
index c2cc1e3..52f3330 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/impl/ThriftScanner.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/impl/ThriftScanner.java
@@ -32,9 +32,11 @@ import org.apache.accumulo.core.Constants;
 import org.apache.accumulo.core.client.AccumuloException;
 import org.apache.accumulo.core.client.AccumuloSecurityException;
 import org.apache.accumulo.core.client.Instance;
+import org.apache.accumulo.core.client.SampleNotPresentException;
 import org.apache.accumulo.core.client.TableDeletedException;
 import org.apache.accumulo.core.client.TableNotFoundException;
 import org.apache.accumulo.core.client.TableOfflineException;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.impl.TabletLocator.TabletLocation;
 import org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException;
 import org.apache.accumulo.core.data.Column;
@@ -50,9 +52,11 @@ import org.apache.accumulo.core.data.thrift.ScanResult;
 import org.apache.accumulo.core.data.thrift.TKeyValue;
 import org.apache.accumulo.core.master.state.tables.TableState;
 import org.apache.accumulo.core.rpc.ThriftUtil;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.tabletserver.thrift.NoSuchScanIDException;
 import org.apache.accumulo.core.tabletserver.thrift.NotServingTabletException;
+import org.apache.accumulo.core.tabletserver.thrift.TSampleNotPresentException;
 import org.apache.accumulo.core.tabletserver.thrift.TabletClientService;
 import org.apache.accumulo.core.tabletserver.thrift.TooManyFilesException;
 import org.apache.accumulo.core.trace.Span;
@@ -92,13 +96,13 @@ public class ThriftScanner {
       try {
         // not reading whole rows (or stopping on row boundries) so there is no need to enable isolation below
         ScanState scanState = new ScanState(context, extent.getTableId(), authorizations, range, fetchedColumns, size, serverSideIteratorList,
-            serverSideIteratorOptions, false, Constants.SCANNER_DEFAULT_READAHEAD_THRESHOLD, batchTimeOut);
+            serverSideIteratorOptions, false, Constants.SCANNER_DEFAULT_READAHEAD_THRESHOLD, null, batchTimeOut);
 
         TabletType ttype = TabletType.type(extent);
         boolean waitForWrites = !serversWaitedForWrites.get(ttype).contains(server);
         InitialScan isr = client.startScan(tinfo, scanState.context.rpcCreds(), extent.toThrift(), scanState.range.toThrift(),
             Translator.translate(scanState.columns, Translators.CT), scanState.size, scanState.serverSideIteratorList, scanState.serverSideIteratorOptions,
-            scanState.authorizations.getAuthorizationsBB(), waitForWrites, scanState.isolated, scanState.readaheadThreshold, scanState.batchTimeOut);
+            scanState.authorizations.getAuthorizationsBB(), waitForWrites, scanState.isolated, scanState.readaheadThreshold, null, scanState.batchTimeOut);
         if (waitForWrites)
           serversWaitedForWrites.get(ttype).add(server);
 
@@ -153,9 +157,11 @@ public class ThriftScanner {
 
     Map<String,Map<String,String>> serverSideIteratorOptions;
 
+    SamplerConfiguration samplerConfig;
+
     public ScanState(ClientContext context, Text tableId, Authorizations authorizations, Range range, SortedSet<Column> fetchedColumns, int size,
         List<IterInfo> serverSideIteratorList, Map<String,Map<String,String>> serverSideIteratorOptions, boolean isolated, long readaheadThreshold,
-        long batchTimeOut) {
+        SamplerConfiguration samplerConfig, long batchTimeOut) {
       this.context = context;
 
       this.authorizations = authorizations;
@@ -183,6 +189,9 @@ public class ThriftScanner {
 
       this.isolated = isolated;
       this.readaheadThreshold = readaheadThreshold;
+
+      this.samplerConfig = samplerConfig;
+
       this.batchTimeOut = batchTimeOut;
     }
   }
@@ -288,6 +297,10 @@ public class ThriftScanner {
           throw e;
         } catch (TApplicationException tae) {
           throw new AccumuloServerException(loc.tablet_location, tae);
+        } catch (TSampleNotPresentException tsnpe) {
+          String message = "Table " + Tables.getPrintableTableInfoFromId(instance, scanState.tableId.toString())
+              + " does not have sampling configured or built";
+          throw new SampleNotPresentException(message, tsnpe);
         } catch (NotServingTabletException e) {
           error = "Scan failed, not serving tablet " + loc;
           if (!error.equals(lastError))
@@ -377,7 +390,7 @@ public class ThriftScanner {
   }
 
   private static List<KeyValue> scan(TabletLocation loc, ScanState scanState, ClientContext context) throws AccumuloSecurityException,
-      NotServingTabletException, TException, NoSuchScanIDException, TooManyFilesException {
+      NotServingTabletException, TException, NoSuchScanIDException, TooManyFilesException, TSampleNotPresentException {
     if (scanState.finished)
       return null;
 
@@ -408,9 +421,11 @@ public class ThriftScanner {
 
         TabletType ttype = TabletType.type(loc.tablet_extent);
         boolean waitForWrites = !serversWaitedForWrites.get(ttype).contains(loc.tablet_location);
+
         InitialScan is = client.startScan(tinfo, scanState.context.rpcCreds(), loc.tablet_extent.toThrift(), scanState.range.toThrift(),
             Translator.translate(scanState.columns, Translators.CT), scanState.size, scanState.serverSideIteratorList, scanState.serverSideIteratorOptions,
-            scanState.authorizations.getAuthorizationsBB(), waitForWrites, scanState.isolated, scanState.readaheadThreshold, scanState.batchTimeOut);
+            scanState.authorizations.getAuthorizationsBB(), waitForWrites, scanState.isolated, scanState.readaheadThreshold,
+            SamplerConfigurationImpl.toThrift(scanState.samplerConfig), scanState.batchTimeOut);
         if (waitForWrites)
           serversWaitedForWrites.get(ttype).add(loc.tablet_location);
 

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
index d11639e..b581deb 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
@@ -43,6 +43,7 @@ import org.apache.accumulo.core.client.TableDeletedException;
 import org.apache.accumulo.core.client.TableNotFoundException;
 import org.apache.accumulo.core.client.TableOfflineException;
 import org.apache.accumulo.core.client.admin.DelegationTokenConfig;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.admin.SecurityOperations;
 import org.apache.accumulo.core.client.impl.AuthenticationTokenIdentifier;
 import org.apache.accumulo.core.client.impl.ClientContext;
@@ -574,6 +575,15 @@ public abstract class AbstractInputFormat<K,V> implements InputFormat<K,V> {
         }
       }
 
+      SamplerConfiguration samplerConfig = baseSplit.getSamplerConfiguration();
+      if (null == samplerConfig) {
+        samplerConfig = tableConfig.getSamplerConfiguration();
+      }
+
+      if (samplerConfig != null) {
+        scannerBase.setSamplerConfiguration(samplerConfig);
+      }
+
       scannerIterator = scannerBase.iterator();
       numKeysRead = 0;
     }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormat.java
index 0eb304f..b383f3e 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormat.java
@@ -19,6 +19,7 @@ package org.apache.accumulo.core.client.mapred;
 import java.io.IOException;
 import java.util.Arrays;
 
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.mapreduce.lib.impl.ConfiguratorBase;
 import org.apache.accumulo.core.client.mapreduce.lib.impl.FileOutputConfigurator;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
@@ -140,6 +141,20 @@ public class AccumuloFileOutputFormat extends FileOutputFormat<Key,Value> {
     FileOutputConfigurator.setReplication(CLASS, job, replication);
   }
 
+  /**
+   * Specify a sampler to be used when writing out data. This will result in the output file having sample data.
+   *
+   * @param job
+   *          The Hadoop job instance to be configured
+   * @param samplerConfig
+   *          The configuration for creating sample data in the output file.
+   * @since 1.8.0
+   */
+
+  public static void setSampler(JobConf job, SamplerConfiguration samplerConfig) {
+    FileOutputConfigurator.setSampler(CLASS, job, samplerConfig);
+  }
+
   @Override
   public RecordWriter<Key,Value> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
     // get the path of the temporary output file

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
index ffb02a9..a9403a5 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
@@ -25,7 +25,9 @@ import org.apache.accumulo.core.client.ClientSideIteratorScanner;
 import org.apache.accumulo.core.client.IsolatedScanner;
 import org.apache.accumulo.core.client.IteratorSetting;
 import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.ScannerBase;
 import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.impl.TabletLocator;
 import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
 import org.apache.accumulo.core.data.Key;
@@ -338,6 +340,23 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   }
 
   /**
+   * Causes input format to read sample data. If sample data was created using a different configuration or a tables sampler configuration changes while reading
+   * data, then the input format will throw an error.
+   *
+   *
+   * @param job
+   *          the Hadoop job instance to be configured
+   * @param samplerConfig
+   *          The sampler configuration that sample must have been created with inorder for reading sample data to succeed.
+   *
+   * @since 1.8.0
+   * @see ScannerBase#setSamplerConfiguration(SamplerConfiguration)
+   */
+  public static void setSamplerConfiguration(JobConf job, SamplerConfiguration samplerConfig) {
+    InputConfigurator.setSamplerConfiguration(CLASS, job, samplerConfig);
+  }
+
+  /**
    * Initializes an Accumulo {@link org.apache.accumulo.core.client.impl.TabletLocator} based on the configuration.
    *
    * @param job

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
index 7db67c7..0e51f03 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
@@ -43,6 +43,7 @@ import org.apache.accumulo.core.client.TableDeletedException;
 import org.apache.accumulo.core.client.TableNotFoundException;
 import org.apache.accumulo.core.client.TableOfflineException;
 import org.apache.accumulo.core.client.admin.DelegationTokenConfig;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.admin.SecurityOperations;
 import org.apache.accumulo.core.client.impl.AuthenticationTokenIdentifier;
 import org.apache.accumulo.core.client.impl.ClientContext;
@@ -604,6 +605,15 @@ public abstract class AbstractInputFormat<K,V> extends InputFormat<K,V> {
         }
       }
 
+      SamplerConfiguration samplerConfig = split.getSamplerConfiguration();
+      if (null == samplerConfig) {
+        samplerConfig = tableConfig.getSamplerConfiguration();
+      }
+
+      if (samplerConfig != null) {
+        scannerBase.setSamplerConfiguration(samplerConfig);
+      }
+
       scannerIterator = scannerBase.iterator();
       numKeysRead = 0;
     }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormat.java
index abd96b6..7d4c0e2 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormat.java
@@ -19,6 +19,7 @@ package org.apache.accumulo.core.client.mapreduce;
 import java.io.IOException;
 import java.util.Arrays;
 
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.mapreduce.lib.impl.FileOutputConfigurator;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.conf.Property;
@@ -138,6 +139,20 @@ public class AccumuloFileOutputFormat extends FileOutputFormat<Key,Value> {
     FileOutputConfigurator.setReplication(CLASS, job.getConfiguration(), replication);
   }
 
+  /**
+   * Specify a sampler to be used when writing out data. This will result in the output file having sample data.
+   *
+   * @param job
+   *          The Hadoop job instance to be configured
+   * @param samplerConfig
+   *          The configuration for creating sample data in the output file.
+   * @since 1.8.0
+   */
+
+  public static void setSampler(Job job, SamplerConfiguration samplerConfig) {
+    FileOutputConfigurator.setSampler(CLASS, job.getConfiguration(), samplerConfig);
+  }
+
   @Override
   public RecordWriter<Key,Value> getRecordWriter(TaskAttemptContext context) throws IOException {
     // get the path of the temporary output file

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
index 6ab8a19..e5a0b90 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
@@ -25,7 +25,9 @@ import org.apache.accumulo.core.client.ClientSideIteratorScanner;
 import org.apache.accumulo.core.client.IsolatedScanner;
 import org.apache.accumulo.core.client.IteratorSetting;
 import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.ScannerBase;
 import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.impl.TabletLocator;
 import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
 import org.apache.accumulo.core.data.Key;
@@ -337,6 +339,23 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   }
 
   /**
+   * Causes input format to read sample data. If sample data was created using a different configuration or a tables sampler configuration changes while reading
+   * data, then the input format will throw an error.
+   *
+   *
+   * @param job
+   *          the Hadoop job instance to be configured
+   * @param samplerConfig
+   *          The sampler configuration that sample must have been created with inorder for reading sample data to succeed.
+   *
+   * @since 1.8.0
+   * @see ScannerBase#setSamplerConfiguration(SamplerConfiguration)
+   */
+  public static void setSamplerConfiguration(Job job, SamplerConfiguration samplerConfig) {
+    InputConfigurator.setSamplerConfiguration(CLASS, job.getConfiguration(), samplerConfig);
+  }
+
+  /**
    * Initializes an Accumulo {@link org.apache.accumulo.core.client.impl.TabletLocator} based on the configuration.
    *
    * @param context

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputTableConfig.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputTableConfig.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputTableConfig.java
index 257f6c9..51ad7eb 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputTableConfig.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputTableConfig.java
@@ -25,6 +25,8 @@ import java.util.HashSet;
 import java.util.List;
 
 import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.ScannerBase;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.util.Pair;
 import org.apache.hadoop.io.Text;
@@ -43,6 +45,7 @@ public class InputTableConfig implements Writable {
   private boolean useLocalIterators = false;
   private boolean useIsolatedScanners = false;
   private boolean offlineScan = false;
+  private SamplerConfiguration samplerConfig = null;
 
   public InputTableConfig() {}
 
@@ -241,6 +244,26 @@ public class InputTableConfig implements Writable {
     return useIsolatedScanners;
   }
 
+  /**
+   * Set the sampler configuration to use when reading from the data.
+   *
+   * @see ScannerBase#setSamplerConfiguration(SamplerConfiguration)
+   * @see InputFormatBase#setSamplerConfiguration(org.apache.hadoop.mapreduce.Job, SamplerConfiguration)
+   *
+   * @since 1.8.0
+   */
+  public void setSamplerConfiguration(SamplerConfiguration samplerConfiguration) {
+    this.samplerConfig = samplerConfiguration;
+  }
+
+  /**
+   *
+   * @since 1.8.0
+   */
+  public SamplerConfiguration getSamplerConfiguration() {
+    return samplerConfig;
+  }
+
   @Override
   public void write(DataOutput dataOutput) throws IOException {
     if (iterators != null) {
@@ -340,6 +363,8 @@ public class InputTableConfig implements Writable {
       return false;
     if (ranges != null ? !ranges.equals(that.ranges) : that.ranges != null)
       return false;
+    if (samplerConfig != null ? !samplerConfig.equals(that.samplerConfig) : that.samplerConfig != null)
+      return false;
     return true;
   }
 
@@ -352,6 +377,7 @@ public class InputTableConfig implements Writable {
     result = 31 * result + (useLocalIterators ? 1 : 0);
     result = 31 * result + (useIsolatedScanners ? 1 : 0);
     result = 31 * result + (offlineScan ? 1 : 0);
+    result = 31 * result + (samplerConfig == null ? 0 : samplerConfig.hashCode());
     return result;
   }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
index f3e17c6..b4f9dca 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
@@ -32,6 +32,7 @@ import org.apache.accumulo.core.client.ClientConfiguration;
 import org.apache.accumulo.core.client.Instance;
 import org.apache.accumulo.core.client.IteratorSetting;
 import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.mapreduce.impl.SplitUtils;
 import org.apache.accumulo.core.client.mapreduce.lib.impl.ConfiguratorBase.TokenSource;
 import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
@@ -41,6 +42,7 @@ import org.apache.accumulo.core.data.ByteSequence;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.PartialKey;
 import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.util.Base64;
 import org.apache.accumulo.core.util.DeprecationUtil;
@@ -64,6 +66,7 @@ public class RangeInputSplit extends InputSplit implements Writable {
   private Authorizations auths;
   private Set<Pair<Text,Text>> fetchedColumns;
   private List<IteratorSetting> iterators;
+  private SamplerConfiguration samplerConfig;
   private Level level;
 
   public RangeInputSplit() {
@@ -215,6 +218,10 @@ public class RangeInputSplit extends InputSplit implements Writable {
     if (in.readBoolean()) {
       level = Level.toLevel(in.readInt());
     }
+
+    if (in.readBoolean()) {
+      samplerConfig = new SamplerConfigurationImpl(in).toSamplerConfiguration();
+    }
   }
 
   @Override
@@ -301,6 +308,11 @@ public class RangeInputSplit extends InputSplit implements Writable {
     if (null != level) {
       out.writeInt(level.toInt());
     }
+
+    out.writeBoolean(null != samplerConfig);
+    if (null != samplerConfig) {
+      new SamplerConfigurationImpl(samplerConfig).write(out);
+    }
   }
 
   /**
@@ -510,6 +522,15 @@ public class RangeInputSplit extends InputSplit implements Writable {
     sb.append(" fetchColumns: ").append(fetchedColumns);
     sb.append(" iterators: ").append(iterators);
     sb.append(" logLevel: ").append(level);
+    sb.append(" samplerConfig: ").append(samplerConfig);
     return sb.toString();
   }
+
+  public void setSamplerConfiguration(SamplerConfiguration samplerConfiguration) {
+    this.samplerConfig = samplerConfiguration;
+  }
+
+  public SamplerConfiguration getSamplerConfiguration() {
+    return samplerConfig;
+  }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mapreduce/impl/SplitUtils.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/impl/SplitUtils.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/impl/SplitUtils.java
index 68268fc..b81b064 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/impl/SplitUtils.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/impl/SplitUtils.java
@@ -50,6 +50,8 @@ public class SplitUtils {
     split.setFetchedColumns(tableConfig.getFetchedColumns());
     split.setIterators(tableConfig.getIterators());
     split.setLogLevel(logLevel);
+
+    split.setSamplerConfiguration(tableConfig.getSamplerConfiguration());
   }
 
   public static float getProgress(ByteSequence start, ByteSequence end, ByteSequence position) {

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/FileOutputConfigurator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/FileOutputConfigurator.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/FileOutputConfigurator.java
index 882c6d3..65248c5 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/FileOutputConfigurator.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/FileOutputConfigurator.java
@@ -17,11 +17,15 @@
 package org.apache.accumulo.core.client.mapreduce.lib.impl;
 
 import java.util.Arrays;
+import java.util.Map;
 import java.util.Map.Entry;
+import java.util.Set;
 
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.conf.ConfigurationCopy;
 import org.apache.accumulo.core.conf.Property;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.hadoop.conf.Configuration;
 
 /**
@@ -97,8 +101,17 @@ public class FileOutputConfigurator extends ConfiguratorBase {
     String prefix = enumToConfKey(implementingClass, Opts.ACCUMULO_PROPERTIES) + ".";
     ConfigurationCopy acuConf = new ConfigurationCopy(AccumuloConfiguration.getDefaultConfiguration());
     for (Entry<String,String> entry : conf)
-      if (entry.getKey().startsWith(prefix))
-        acuConf.set(Property.getPropertyByKey(entry.getKey().substring(prefix.length())), entry.getValue());
+      if (entry.getKey().startsWith(prefix)) {
+        String propString = entry.getKey().substring(prefix.length());
+        Property prop = Property.getPropertyByKey(propString);
+        if (prop != null) {
+          acuConf.set(prop, entry.getValue());
+        } else if (Property.isValidTablePropertyKey(propString)) {
+          acuConf.set(propString, entry.getValue());
+        } else {
+          throw new IllegalArgumentException("Unknown accumulo file property " + propString);
+        }
+      }
     return acuConf;
   }
 
@@ -184,4 +197,16 @@ public class FileOutputConfigurator extends ConfiguratorBase {
     setAccumuloProperty(implementingClass, conf, Property.TABLE_FILE_REPLICATION, replication);
   }
 
+  /**
+   * @since 1.8.0
+   */
+  public static void setSampler(Class<?> implementingClass, Configuration conf, SamplerConfiguration samplerConfig) {
+    Map<String,String> props = new SamplerConfigurationImpl(samplerConfig).toTablePropertiesMap();
+
+    Set<Entry<String,String>> es = props.entrySet();
+    for (Entry<String,String> entry : es) {
+      conf.set(enumToConfKey(implementingClass, Opts.ACCUMULO_PROPERTIES) + "." + entry.getKey(), entry.getValue());
+    }
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/InputConfigurator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/InputConfigurator.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/InputConfigurator.java
index efda7d9..6ba34af 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/InputConfigurator.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/InputConfigurator.java
@@ -46,6 +46,7 @@ import org.apache.accumulo.core.client.IteratorSetting;
 import org.apache.accumulo.core.client.RowIterator;
 import org.apache.accumulo.core.client.Scanner;
 import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.impl.ClientContext;
 import org.apache.accumulo.core.client.impl.Credentials;
 import org.apache.accumulo.core.client.impl.DelegationTokenImpl;
@@ -62,6 +63,7 @@ import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 import org.apache.accumulo.core.master.state.tables.TableState;
 import org.apache.accumulo.core.metadata.MetadataTable;
 import org.apache.accumulo.core.metadata.schema.MetadataSchema;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.security.TablePermission;
 import org.apache.accumulo.core.util.Base64;
@@ -74,6 +76,7 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.util.StringUtils;
 
+import com.google.common.base.Preconditions;
 import com.google.common.collect.Maps;
 
 /**
@@ -87,7 +90,7 @@ public class InputConfigurator extends ConfiguratorBase {
    * @since 1.6.0
    */
   public static enum ScanOpts {
-    TABLE_NAME, AUTHORIZATIONS, RANGES, COLUMNS, ITERATORS, TABLE_CONFIGS
+    TABLE_NAME, AUTHORIZATIONS, RANGES, COLUMNS, ITERATORS, TABLE_CONFIGS, SAMPLER_CONFIG
   }
 
   /**
@@ -805,6 +808,11 @@ public class InputConfigurator extends ConfiguratorBase {
       if (ranges != null)
         queryConfig.setRanges(ranges);
 
+      SamplerConfiguration samplerConfig = getSamplerConfiguration(implementingClass, conf);
+      if (samplerConfig != null) {
+        queryConfig.setSamplerConfiguration(samplerConfig);
+      }
+
       queryConfig.setAutoAdjustRanges(getAutoAdjustRanges(implementingClass, conf)).setUseIsolatedScanners(isIsolated(implementingClass, conf))
           .setUseLocalIterators(usesLocalIterators(implementingClass, conf)).setOfflineScan(isOfflineScan(implementingClass, conf));
       return Maps.immutableEntry(tableName, queryConfig);
@@ -901,4 +909,47 @@ public class InputConfigurator extends ConfiguratorBase {
     }
     return binnedRanges;
   }
+
+  private static String toBase64(Writable writable) {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    DataOutputStream dos = new DataOutputStream(baos);
+    try {
+      writable.write(dos);
+      dos.close();
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+
+    return Base64.encodeBase64String(baos.toByteArray());
+  }
+
+  private static <T extends Writable> T fromBase64(T writable, String enc) {
+    ByteArrayInputStream bais = new ByteArrayInputStream(Base64.decodeBase64(enc));
+    DataInputStream dis = new DataInputStream(bais);
+    try {
+      writable.readFields(dis);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+    return writable;
+  }
+
+  public static void setSamplerConfiguration(Class<?> implementingClass, Configuration conf, SamplerConfiguration samplerConfig) {
+    Preconditions.checkNotNull(samplerConfig);
+
+    String key = enumToConfKey(implementingClass, ScanOpts.SAMPLER_CONFIG);
+    String val = toBase64(new SamplerConfigurationImpl(samplerConfig));
+
+    conf.set(key, val);
+  }
+
+  public static SamplerConfiguration getSamplerConfiguration(Class<?> implementingClass, Configuration conf) {
+    String key = enumToConfKey(implementingClass, ScanOpts.SAMPLER_CONFIG);
+
+    String encodedSC = conf.get(key);
+    if (encodedSC == null)
+      return null;
+
+    return fromBase64(new SamplerConfigurationImpl(), encodedSC).toSamplerConfiguration();
+  }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mock/MockScannerBase.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mock/MockScannerBase.java b/core/src/main/java/org/apache/accumulo/core/client/mock/MockScannerBase.java
index f81e9dd..45b65e9 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mock/MockScannerBase.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mock/MockScannerBase.java
@@ -24,6 +24,7 @@ import java.util.Iterator;
 import java.util.Map.Entry;
 
 import org.apache.accumulo.core.client.ScannerBase;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.impl.ScannerOptions;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.data.ArrayByteSequence;
@@ -112,6 +113,21 @@ public class MockScannerBase extends ScannerOptions implements ScannerBase {
       allIters.add(iter);
       return new MultiIterator(allIters, false);
     }
+
+    @Override
+    public boolean isSamplingEnabled() {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public SamplerConfiguration getSamplerConfiguration() {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public IteratorEnvironment cloneWithSamplingEnabled() {
+      throw new UnsupportedOperationException();
+    }
   }
 
   public SortedKeyValueIterator<Key,Value> createFilter(SortedKeyValueIterator<Key,Value> inner) throws IOException {

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mock/MockTableOperations.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mock/MockTableOperations.java b/core/src/main/java/org/apache/accumulo/core/client/mock/MockTableOperations.java
index 0712f22..7ca5766 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mock/MockTableOperations.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mock/MockTableOperations.java
@@ -40,6 +40,7 @@ import org.apache.accumulo.core.client.admin.CompactionConfig;
 import org.apache.accumulo.core.client.admin.DiskUsage;
 import org.apache.accumulo.core.client.admin.FindMax;
 import org.apache.accumulo.core.client.admin.NewTableConfiguration;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.admin.TimeType;
 import org.apache.accumulo.core.client.impl.TableOperationsHelper;
 import org.apache.accumulo.core.client.impl.Tables;
@@ -480,4 +481,20 @@ class MockTableOperations extends TableOperationsHelper {
     }
     return true;
   }
+
+  @Override
+  public void setSamplerConfiguration(String tableName, SamplerConfiguration samplerConfiguration) throws TableNotFoundException, AccumuloException,
+      AccumuloSecurityException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void clearSamplerConfiguration(String tableName) throws TableNotFoundException, AccumuloException, AccumuloSecurityException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public SamplerConfiguration getSamplerConfiguration(String tableName) throws TableNotFoundException, AccumuloException, AccumuloSecurityException {
+    throw new UnsupportedOperationException();
+  }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/compaction/CompactionSettings.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/compaction/CompactionSettings.java b/core/src/main/java/org/apache/accumulo/core/compaction/CompactionSettings.java
index 43f8c0f..1c5369e 100644
--- a/core/src/main/java/org/apache/accumulo/core/compaction/CompactionSettings.java
+++ b/core/src/main/java/org/apache/accumulo/core/compaction/CompactionSettings.java
@@ -21,6 +21,7 @@ import java.util.Map;
 
 public enum CompactionSettings {
 
+  SF_NO_SAMPLE(new NullType()),
   SF_GT_ESIZE_OPT(new SizeType()),
   SF_LT_ESIZE_OPT(new SizeType()),
   SF_NAME_RE_OPT(new PatternType()),


[3/7] accumulo git commit: ACCUMULO-3913 Added per table sampling

Posted by kt...@apache.org.
http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/util/LocalityGroupUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/util/LocalityGroupUtil.java b/core/src/main/java/org/apache/accumulo/core/util/LocalityGroupUtil.java
index a4936cf..07757a6 100644
--- a/core/src/main/java/org/apache/accumulo/core/util/LocalityGroupUtil.java
+++ b/core/src/main/java/org/apache/accumulo/core/util/LocalityGroupUtil.java
@@ -186,11 +186,11 @@ public class LocalityGroupUtil {
     return ecf;
   }
 
-  private static class PartitionedMutation extends Mutation {
+  public static class PartitionedMutation extends Mutation {
     private byte[] row;
     private List<ColumnUpdate> updates;
 
-    PartitionedMutation(byte[] row, List<ColumnUpdate> updates) {
+    public PartitionedMutation(byte[] row, List<ColumnUpdate> updates) {
       this.row = row;
       this.updates = updates;
     }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/thrift/tabletserver.thrift
----------------------------------------------------------------------
diff --git a/core/src/main/thrift/tabletserver.thrift b/core/src/main/thrift/tabletserver.thrift
index 051daee..27b72f2 100644
--- a/core/src/main/thrift/tabletserver.thrift
+++ b/core/src/main/thrift/tabletserver.thrift
@@ -31,6 +31,10 @@ exception TooManyFilesException {
   1:data.TKeyExtent extent
 }
 
+exception TSampleNotPresentException {
+  1:data.TKeyExtent extent
+}
+
 exception NoSuchScanIDException {
 }
 
@@ -136,6 +140,11 @@ struct IteratorConfig {
    1:list<TIteratorSetting> iterators;
 }
 
+struct TSamplerConfiguration {
+   1:string className
+   2:map<string, string> options
+}
+
 service TabletClientService extends client.ClientService {
   // scan a range of keys
   data.InitialScan startScan(11:trace.TInfo tinfo,
@@ -150,9 +159,10 @@ service TabletClientService extends client.ClientService {
                              9:bool waitForWrites,
                              10:bool isolated,
                              12:i64 readaheadThreshold,
-                             13:i64 batchTimeOut)  throws (1:client.ThriftSecurityException sec, 2:NotServingTabletException nste, 3:TooManyFilesException tmfe),
+                             13:TSamplerConfiguration samplerConfig,
+                             14:i64 batchTimeOut)  throws (1:client.ThriftSecurityException sec, 2:NotServingTabletException nste, 3:TooManyFilesException tmfe, 4:TSampleNotPresentException tsnpe),
                              
-  data.ScanResult continueScan(2:trace.TInfo tinfo, 1:data.ScanID scanID)  throws (1:NoSuchScanIDException nssi, 2:NotServingTabletException nste, 3:TooManyFilesException tmfe),
+  data.ScanResult continueScan(2:trace.TInfo tinfo, 1:data.ScanID scanID)  throws (1:NoSuchScanIDException nssi, 2:NotServingTabletException nste, 3:TooManyFilesException tmfe, 4:TSampleNotPresentException tsnpe),
   oneway void closeScan(2:trace.TInfo tinfo, 1:data.ScanID scanID),
 
   // scan over a series of ranges
@@ -164,8 +174,9 @@ service TabletClientService extends client.ClientService {
                                   5:map<string, map<string, string>> ssio,
                                   6:list<binary> authorizations,
                                   7:bool waitForWrites,
-                                  9:i64 batchTimeOut)  throws (1:client.ThriftSecurityException sec),
-  data.MultiScanResult continueMultiScan(2:trace.TInfo tinfo, 1:data.ScanID scanID) throws (1:NoSuchScanIDException nssi),
+                                  9:TSamplerConfiguration samplerConfig,
+                                  10:i64 batchTimeOut)  throws (1:client.ThriftSecurityException sec, 2:TSampleNotPresentException tsnpe),
+  data.MultiScanResult continueMultiScan(2:trace.TInfo tinfo, 1:data.ScanID scanID) throws (1:NoSuchScanIDException nssi, 2:TSampleNotPresentException tsnpe),
   void closeMultiScan(2:trace.TInfo tinfo, 1:data.ScanID scanID) throws (1:NoSuchScanIDException nssi),
   
   //the following calls support a batch update to multiple tablets on a tablet server

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/test/java/org/apache/accumulo/core/client/impl/TableOperationsHelperTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/client/impl/TableOperationsHelperTest.java b/core/src/test/java/org/apache/accumulo/core/client/impl/TableOperationsHelperTest.java
index 7a56d1d..7bf9eb1 100644
--- a/core/src/test/java/org/apache/accumulo/core/client/impl/TableOperationsHelperTest.java
+++ b/core/src/test/java/org/apache/accumulo/core/client/impl/TableOperationsHelperTest.java
@@ -36,6 +36,7 @@ import org.apache.accumulo.core.client.TableNotFoundException;
 import org.apache.accumulo.core.client.admin.CompactionConfig;
 import org.apache.accumulo.core.client.admin.DiskUsage;
 import org.apache.accumulo.core.client.admin.NewTableConfiguration;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.admin.TimeType;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
@@ -226,6 +227,22 @@ public class TableOperationsHelperTest {
         TableNotFoundException {
       return false;
     }
+
+    @Override
+    public void setSamplerConfiguration(String tableName, SamplerConfiguration samplerConfiguration) throws TableNotFoundException, AccumuloException,
+        AccumuloSecurityException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void clearSamplerConfiguration(String tableName) throws TableNotFoundException, AccumuloException, AccumuloSecurityException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public SamplerConfiguration getSamplerConfiguration(String tableName) throws TableNotFoundException, AccumuloException, AccumuloSecurityException {
+      throw new UnsupportedOperationException();
+    }
   }
 
   protected TableOperationsHelper getHelper() {

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormatTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormatTest.java b/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormatTest.java
index bcf8a24..d88453e 100644
--- a/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormatTest.java
+++ b/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormatTest.java
@@ -20,9 +20,12 @@ import static org.junit.Assert.assertEquals;
 
 import java.io.IOException;
 
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.mapreduce.lib.impl.FileOutputConfigurator;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.conf.Property;
+import org.apache.accumulo.core.sample.RowSampler;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.hadoop.mapred.JobConf;
 import org.junit.Test;
 
@@ -36,6 +39,9 @@ public class AccumuloFileOutputFormatTest {
     long c = 50l;
     long d = 10l;
     String e = "snappy";
+    SamplerConfiguration samplerConfig = new SamplerConfiguration(RowSampler.class.getName());
+    samplerConfig.addOption("hasher", "murmur3_32");
+    samplerConfig.addOption("modulus", "109");
 
     JobConf job = new JobConf();
     AccumuloFileOutputFormat.setReplication(job, a);
@@ -43,6 +49,7 @@ public class AccumuloFileOutputFormatTest {
     AccumuloFileOutputFormat.setDataBlockSize(job, c);
     AccumuloFileOutputFormat.setIndexBlockSize(job, d);
     AccumuloFileOutputFormat.setCompressionType(job, e);
+    AccumuloFileOutputFormat.setSampler(job, samplerConfig);
 
     AccumuloConfiguration acuconf = FileOutputConfigurator.getAccumuloConfiguration(AccumuloFileOutputFormat.class, job);
 
@@ -51,12 +58,16 @@ public class AccumuloFileOutputFormatTest {
     assertEquals(50l, acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE));
     assertEquals(10l, acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX));
     assertEquals("snappy", acuconf.get(Property.TABLE_FILE_COMPRESSION_TYPE));
+    assertEquals(new SamplerConfigurationImpl(samplerConfig), SamplerConfigurationImpl.newSamplerConfig(acuconf));
 
     a = 17;
     b = 1300l;
     c = 150l;
     d = 110l;
     e = "lzo";
+    samplerConfig = new SamplerConfiguration(RowSampler.class.getName());
+    samplerConfig.addOption("hasher", "md5");
+    samplerConfig.addOption("modulus", "100003");
 
     job = new JobConf();
     AccumuloFileOutputFormat.setReplication(job, a);
@@ -64,6 +75,7 @@ public class AccumuloFileOutputFormatTest {
     AccumuloFileOutputFormat.setDataBlockSize(job, c);
     AccumuloFileOutputFormat.setIndexBlockSize(job, d);
     AccumuloFileOutputFormat.setCompressionType(job, e);
+    AccumuloFileOutputFormat.setSampler(job, samplerConfig);
 
     acuconf = FileOutputConfigurator.getAccumuloConfiguration(AccumuloFileOutputFormat.class, job);
 
@@ -72,6 +84,6 @@ public class AccumuloFileOutputFormatTest {
     assertEquals(150l, acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE));
     assertEquals(110l, acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX));
     assertEquals("lzo", acuconf.get(Property.TABLE_FILE_COMPRESSION_TYPE));
-
+    assertEquals(new SamplerConfigurationImpl(samplerConfig), SamplerConfigurationImpl.newSamplerConfig(acuconf));
   }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormatTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormatTest.java b/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormatTest.java
index 3923566..cf0c8d6 100644
--- a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormatTest.java
+++ b/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormatTest.java
@@ -20,9 +20,12 @@ import static org.junit.Assert.assertEquals;
 
 import java.io.IOException;
 
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.mapreduce.lib.impl.FileOutputConfigurator;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.conf.Property;
+import org.apache.accumulo.core.sample.RowSampler;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.hadoop.mapreduce.Job;
 import org.junit.Test;
 
@@ -36,6 +39,9 @@ public class AccumuloFileOutputFormatTest {
     long c = 50l;
     long d = 10l;
     String e = "snappy";
+    SamplerConfiguration samplerConfig = new SamplerConfiguration(RowSampler.class.getName());
+    samplerConfig.addOption("hasher", "murmur3_32");
+    samplerConfig.addOption("modulus", "109");
 
     Job job1 = Job.getInstance();
     AccumuloFileOutputFormat.setReplication(job1, a);
@@ -43,6 +49,7 @@ public class AccumuloFileOutputFormatTest {
     AccumuloFileOutputFormat.setDataBlockSize(job1, c);
     AccumuloFileOutputFormat.setIndexBlockSize(job1, d);
     AccumuloFileOutputFormat.setCompressionType(job1, e);
+    AccumuloFileOutputFormat.setSampler(job1, samplerConfig);
 
     AccumuloConfiguration acuconf = FileOutputConfigurator.getAccumuloConfiguration(AccumuloFileOutputFormat.class, job1.getConfiguration());
 
@@ -51,12 +58,16 @@ public class AccumuloFileOutputFormatTest {
     assertEquals(50l, acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE));
     assertEquals(10l, acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX));
     assertEquals("snappy", acuconf.get(Property.TABLE_FILE_COMPRESSION_TYPE));
+    assertEquals(new SamplerConfigurationImpl(samplerConfig), SamplerConfigurationImpl.newSamplerConfig(acuconf));
 
     a = 17;
     b = 1300l;
     c = 150l;
     d = 110l;
     e = "lzo";
+    samplerConfig = new SamplerConfiguration(RowSampler.class.getName());
+    samplerConfig.addOption("hasher", "md5");
+    samplerConfig.addOption("modulus", "100003");
 
     Job job2 = Job.getInstance();
     AccumuloFileOutputFormat.setReplication(job2, a);
@@ -64,6 +75,7 @@ public class AccumuloFileOutputFormatTest {
     AccumuloFileOutputFormat.setDataBlockSize(job2, c);
     AccumuloFileOutputFormat.setIndexBlockSize(job2, d);
     AccumuloFileOutputFormat.setCompressionType(job2, e);
+    AccumuloFileOutputFormat.setSampler(job2, samplerConfig);
 
     acuconf = FileOutputConfigurator.getAccumuloConfiguration(AccumuloFileOutputFormat.class, job2.getConfiguration());
 
@@ -72,6 +84,7 @@ public class AccumuloFileOutputFormatTest {
     assertEquals(150l, acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE));
     assertEquals(110l, acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX));
     assertEquals("lzo", acuconf.get(Property.TABLE_FILE_COMPRESSION_TYPE));
+    assertEquals(new SamplerConfigurationImpl(samplerConfig), SamplerConfigurationImpl.newSamplerConfig(acuconf));
 
   }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/test/java/org/apache/accumulo/core/file/rfile/MultiLevelIndexTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/file/rfile/MultiLevelIndexTest.java b/core/src/test/java/org/apache/accumulo/core/file/rfile/MultiLevelIndexTest.java
index 6f89454..66978dd 100644
--- a/core/src/test/java/org/apache/accumulo/core/file/rfile/MultiLevelIndexTest.java
+++ b/core/src/test/java/org/apache/accumulo/core/file/rfile/MultiLevelIndexTest.java
@@ -21,7 +21,6 @@ import java.io.IOException;
 import java.util.Random;
 
 import junit.framework.TestCase;
-
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.file.blockfile.ABlockWriter;
@@ -77,7 +76,7 @@ public class MultiLevelIndexTest extends TestCase {
     FSDataInputStream in = new FSDataInputStream(bais);
     CachableBlockFile.Reader _cbr = new CachableBlockFile.Reader(in, data.length, CachedConfiguration.getInstance(), aconf);
 
-    Reader reader = new Reader(_cbr, RFile.RINDEX_VER_7);
+    Reader reader = new Reader(_cbr, RFile.RINDEX_VER_8);
     BlockRead rootIn = _cbr.getMetaBlock("root");
     reader.readFields(rootIn);
     rootIn.close();

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/test/java/org/apache/accumulo/core/file/rfile/RFileTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/file/rfile/RFileTest.java b/core/src/test/java/org/apache/accumulo/core/file/rfile/RFileTest.java
index 2e2b346..ab98f49 100644
--- a/core/src/test/java/org/apache/accumulo/core/file/rfile/RFileTest.java
+++ b/core/src/test/java/org/apache/accumulo/core/file/rfile/RFileTest.java
@@ -28,16 +28,21 @@ import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.AbstractMap;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.Comparator;
 import java.util.HashSet;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Map.Entry;
 import java.util.Random;
 import java.util.Set;
 
 import org.apache.accumulo.core.Constants;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
+import org.apache.accumulo.core.client.impl.BaseIteratorEnvironment;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.conf.ConfigurationCopy;
 import org.apache.accumulo.core.conf.Property;
@@ -57,6 +62,10 @@ import org.apache.accumulo.core.iterators.system.ColumnFamilySkippingIterator;
 import org.apache.accumulo.core.metadata.MetadataTable;
 import org.apache.accumulo.core.metadata.schema.MetadataSchema;
 import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection;
+import org.apache.accumulo.core.sample.RowSampler;
+import org.apache.accumulo.core.sample.Sampler;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
+import org.apache.accumulo.core.sample.impl.SamplerFactory;
 import org.apache.accumulo.core.security.crypto.CryptoTest;
 import org.apache.accumulo.core.util.CachedConfiguration;
 import org.apache.hadoop.conf.Configuration;
@@ -68,14 +77,37 @@ import org.apache.hadoop.fs.Seekable;
 import org.apache.hadoop.io.Text;
 import org.apache.log4j.Level;
 import org.apache.log4j.Logger;
+import org.junit.Assert;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 
+import com.google.common.hash.HashCode;
+import com.google.common.hash.Hasher;
+import com.google.common.hash.Hashing;
 import com.google.common.primitives.Bytes;
 
 public class RFileTest {
 
+  public static class SampleIE extends BaseIteratorEnvironment {
+
+    private SamplerConfiguration samplerConfig;
+
+    SampleIE(SamplerConfiguration config) {
+      this.samplerConfig = config;
+    }
+
+    @Override
+    public boolean isSamplingEnabled() {
+      return samplerConfig != null;
+    }
+
+    @Override
+    public SamplerConfiguration getSamplerConfiguration() {
+      return samplerConfig;
+    }
+  }
+
   private static final Collection<ByteSequence> EMPTY_COL_FAMS = new ArrayList<ByteSequence>();
 
   @Rule
@@ -193,7 +225,15 @@ public class RFileTest {
       baos = new ByteArrayOutputStream();
       dos = new FSDataOutputStream(baos, new FileSystem.Statistics("a"));
       CachableBlockFile.Writer _cbw = new CachableBlockFile.Writer(dos, "gz", conf, accumuloConfiguration);
-      writer = new RFile.Writer(_cbw, blockSize, 1000);
+
+      SamplerConfigurationImpl samplerConfig = SamplerConfigurationImpl.newSamplerConfig(accumuloConfiguration);
+      Sampler sampler = null;
+
+      if (samplerConfig != null) {
+        sampler = SamplerFactory.newSampler(samplerConfig, accumuloConfiguration);
+      }
+
+      writer = new RFile.Writer(_cbw, blockSize, 1000, samplerConfig, sampler);
 
       if (startDLG)
         writer.startDefaultLocalityGroup();
@@ -221,7 +261,6 @@ public class RFileTest {
     }
 
     public void openReader(boolean cfsi) throws IOException {
-
       int fileLength = 0;
       byte[] data = null;
       data = baos.toByteArray();
@@ -1206,7 +1245,6 @@ public class RFileTest {
   @Test
   public void test14() throws IOException {
     // test starting locality group after default locality group was started
-
     TestRFile trf = new TestRFile(conf);
 
     trf.openWriter(false);
@@ -1558,6 +1596,7 @@ public class RFileTest {
     runVersionTest(3);
     runVersionTest(4);
     runVersionTest(6);
+    runVersionTest(7);
   }
 
   private void runVersionTest(int version) throws IOException {
@@ -1762,6 +1801,294 @@ public class RFileTest {
     conf = null;
   }
 
+  private Key nk(int r, int c) {
+    String row = String.format("r%06d", r);
+    switch (c) {
+      case 0:
+        return new Key(row, "user", "addr");
+      case 1:
+        return new Key(row, "user", "name");
+      default:
+        throw new IllegalArgumentException();
+    }
+  }
+
+  private Value nv(int r, int c) {
+    switch (c) {
+      case 0:
+        return new Value(("123" + r + " west st").getBytes());
+      case 1:
+        return new Value(("bob" + r).getBytes());
+      default:
+        throw new IllegalArgumentException();
+    }
+  }
+
+  private static void hash(Hasher hasher, Key key, Value val) {
+    hasher.putBytes(key.getRowData().toArray());
+    hasher.putBytes(key.getColumnFamilyData().toArray());
+    hasher.putBytes(key.getColumnQualifierData().toArray());
+    hasher.putBytes(key.getColumnVisibilityData().toArray());
+    hasher.putLong(key.getTimestamp());
+    hasher.putBoolean(key.isDeleted());
+    hasher.putBytes(val.get());
+  }
+
+  private static void add(TestRFile trf, Key key, Value val, Hasher dataHasher, List<Entry<Key,Value>> sample, Sampler sampler) throws IOException {
+    if (sampler.accept(key)) {
+      sample.add(new AbstractMap.SimpleImmutableEntry<Key,Value>(key, val));
+    }
+
+    hash(dataHasher, key, val);
+
+    trf.writer.append(key, val);
+  }
+
+  private List<Entry<Key,Value>> toList(SortedKeyValueIterator<Key,Value> sample) throws IOException {
+    ArrayList<Entry<Key,Value>> ret = new ArrayList<>();
+
+    while (sample.hasTop()) {
+      ret.add(new AbstractMap.SimpleImmutableEntry<Key,Value>(new Key(sample.getTopKey()), new Value(sample.getTopValue())));
+      sample.next();
+    }
+
+    return ret;
+  }
+
+  private void checkSample(SortedKeyValueIterator<Key,Value> sample, List<Entry<Key,Value>> sampleData) throws IOException {
+    checkSample(sample, sampleData, EMPTY_COL_FAMS, false);
+  }
+
+  private void checkSample(SortedKeyValueIterator<Key,Value> sample, List<Entry<Key,Value>> sampleData, Collection<ByteSequence> columnFamilies,
+      boolean inclusive) throws IOException {
+
+    sample.seek(new Range(), columnFamilies, inclusive);
+    Assert.assertEquals(sampleData, toList(sample));
+
+    Random rand = new Random();
+    long seed = rand.nextLong();
+    rand = new Random(seed);
+
+    // randomly seek sample iterator and verify
+    for (int i = 0; i < 33; i++) {
+      Key startKey = null;
+      boolean startInclusive = false;
+      int startIndex = 0;
+
+      Key endKey = null;
+      boolean endInclusive = false;
+      int endIndex = sampleData.size();
+
+      if (rand.nextBoolean()) {
+        startIndex = rand.nextInt(sampleData.size());
+        startKey = sampleData.get(startIndex).getKey();
+        startInclusive = rand.nextBoolean();
+        if (!startInclusive) {
+          startIndex++;
+        }
+      }
+
+      if (startIndex < endIndex && rand.nextBoolean()) {
+        endIndex -= rand.nextInt(endIndex - startIndex);
+        endKey = sampleData.get(endIndex - 1).getKey();
+        endInclusive = rand.nextBoolean();
+        if (!endInclusive) {
+          endIndex--;
+        }
+      } else if (startIndex == endIndex) {
+        endInclusive = rand.nextBoolean();
+      }
+
+      sample.seek(new Range(startKey, startInclusive, endKey, endInclusive), columnFamilies, inclusive);
+      Assert.assertEquals("seed: " + seed, sampleData.subList(startIndex, endIndex), toList(sample));
+    }
+  }
+
+  @Test
+  public void testSample() throws IOException {
+
+    int num = 10000;
+
+    for (int sampleBufferSize : new int[] {1 << 10, 1 << 20}) {
+      // force sample buffer to flush for smaller data
+      RFile.setSampleBufferSize(sampleBufferSize);
+
+      for (int modulus : new int[] {19, 103, 1019}) {
+        Hasher dataHasher = Hashing.md5().newHasher();
+        List<Entry<Key,Value>> sampleData = new ArrayList<Entry<Key,Value>>();
+
+        ConfigurationCopy sampleConf = new ConfigurationCopy(conf == null ? AccumuloConfiguration.getDefaultConfiguration() : conf);
+        sampleConf.set(Property.TABLE_SAMPLER, RowSampler.class.getName());
+        sampleConf.set(Property.TABLE_SAMPLER_OPTS + "hasher", "murmur3_32");
+        sampleConf.set(Property.TABLE_SAMPLER_OPTS + "modulus", modulus + "");
+
+        Sampler sampler = SamplerFactory.newSampler(SamplerConfigurationImpl.newSamplerConfig(sampleConf), sampleConf);
+
+        TestRFile trf = new TestRFile(sampleConf);
+
+        trf.openWriter();
+
+        for (int i = 0; i < num; i++) {
+          add(trf, nk(i, 0), nv(i, 0), dataHasher, sampleData, sampler);
+          add(trf, nk(i, 1), nv(i, 1), dataHasher, sampleData, sampler);
+        }
+
+        HashCode expectedDataHash = dataHasher.hash();
+
+        trf.closeWriter();
+
+        trf.openReader();
+
+        FileSKVIterator sample = trf.reader.getSample(SamplerConfigurationImpl.newSamplerConfig(sampleConf));
+
+        checkSample(sample, sampleData);
+
+        Assert.assertEquals(expectedDataHash, hash(trf.reader));
+
+        SampleIE ie = new SampleIE(SamplerConfigurationImpl.newSamplerConfig(sampleConf).toSamplerConfiguration());
+
+        for (int i = 0; i < 3; i++) {
+          // test opening and closing deep copies a few times.
+          trf.reader.closeDeepCopies();
+
+          sample = trf.reader.getSample(SamplerConfigurationImpl.newSamplerConfig(sampleConf));
+          SortedKeyValueIterator<Key,Value> sampleDC1 = sample.deepCopy(ie);
+          SortedKeyValueIterator<Key,Value> sampleDC2 = sample.deepCopy(ie);
+          SortedKeyValueIterator<Key,Value> sampleDC3 = trf.reader.deepCopy(ie);
+          SortedKeyValueIterator<Key,Value> allDC1 = sampleDC1.deepCopy(new SampleIE(null));
+          SortedKeyValueIterator<Key,Value> allDC2 = sample.deepCopy(new SampleIE(null));
+
+          Assert.assertEquals(expectedDataHash, hash(allDC1));
+          Assert.assertEquals(expectedDataHash, hash(allDC2));
+
+          checkSample(sample, sampleData);
+          checkSample(sampleDC1, sampleData);
+          checkSample(sampleDC2, sampleData);
+          checkSample(sampleDC3, sampleData);
+        }
+
+        trf.reader.closeDeepCopies();
+
+        trf.closeReader();
+      }
+    }
+  }
+
+  private HashCode hash(SortedKeyValueIterator<Key,Value> iter) throws IOException {
+    Hasher dataHasher = Hashing.md5().newHasher();
+    iter.seek(new Range(), EMPTY_COL_FAMS, false);
+    while (iter.hasTop()) {
+      hash(dataHasher, iter.getTopKey(), iter.getTopValue());
+      iter.next();
+    }
+
+    return dataHasher.hash();
+  }
+
+  @Test
+  public void testSampleLG() throws IOException {
+
+    int num = 5000;
+
+    for (int sampleBufferSize : new int[] {1 << 10, 1 << 20}) {
+      // force sample buffer to flush for smaller data
+      RFile.setSampleBufferSize(sampleBufferSize);
+
+      for (int modulus : new int[] {19, 103, 1019}) {
+        List<Entry<Key,Value>> sampleDataLG1 = new ArrayList<Entry<Key,Value>>();
+        List<Entry<Key,Value>> sampleDataLG2 = new ArrayList<Entry<Key,Value>>();
+
+        ConfigurationCopy sampleConf = new ConfigurationCopy(conf == null ? AccumuloConfiguration.getDefaultConfiguration() : conf);
+        sampleConf.set(Property.TABLE_SAMPLER, RowSampler.class.getName());
+        sampleConf.set(Property.TABLE_SAMPLER_OPTS + "hasher", "murmur3_32");
+        sampleConf.set(Property.TABLE_SAMPLER_OPTS + "modulus", modulus + "");
+
+        Sampler sampler = SamplerFactory.newSampler(SamplerConfigurationImpl.newSamplerConfig(sampleConf), sampleConf);
+
+        TestRFile trf = new TestRFile(sampleConf);
+
+        trf.openWriter(false, 1000);
+
+        trf.writer.startNewLocalityGroup("meta-lg", ncfs("metaA", "metaB"));
+        for (int r = 0; r < num; r++) {
+          String row = String.format("r%06d", r);
+          Key k1 = new Key(row, "metaA", "q9", 7);
+          Key k2 = new Key(row, "metaB", "q8", 7);
+          Key k3 = new Key(row, "metaB", "qA", 7);
+
+          Value v1 = new Value(("" + r).getBytes());
+          Value v2 = new Value(("" + r * 93).getBytes());
+          Value v3 = new Value(("" + r * 113).getBytes());
+
+          if (sampler.accept(k1)) {
+            sampleDataLG1.add(new AbstractMap.SimpleImmutableEntry<Key,Value>(k1, v1));
+            sampleDataLG1.add(new AbstractMap.SimpleImmutableEntry<Key,Value>(k2, v2));
+            sampleDataLG1.add(new AbstractMap.SimpleImmutableEntry<Key,Value>(k3, v3));
+          }
+
+          trf.writer.append(k1, v1);
+          trf.writer.append(k2, v2);
+          trf.writer.append(k3, v3);
+        }
+
+        trf.writer.startDefaultLocalityGroup();
+
+        for (int r = 0; r < num; r++) {
+          String row = String.format("r%06d", r);
+          Key k1 = new Key(row, "dataA", "q9", 7);
+
+          Value v1 = new Value(("" + r).getBytes());
+
+          if (sampler.accept(k1)) {
+            sampleDataLG2.add(new AbstractMap.SimpleImmutableEntry<Key,Value>(k1, v1));
+          }
+
+          trf.writer.append(k1, v1);
+        }
+
+        trf.closeWriter();
+
+        Assert.assertTrue(sampleDataLG1.size() > 0);
+        Assert.assertTrue(sampleDataLG2.size() > 0);
+
+        trf.openReader(false);
+        FileSKVIterator sample = trf.reader.getSample(SamplerConfigurationImpl.newSamplerConfig(sampleConf));
+
+        checkSample(sample, sampleDataLG1, ncfs("metaA", "metaB"), true);
+        checkSample(sample, sampleDataLG1, ncfs("metaA"), true);
+        checkSample(sample, sampleDataLG1, ncfs("metaB"), true);
+        checkSample(sample, sampleDataLG1, ncfs("dataA"), false);
+
+        checkSample(sample, sampleDataLG2, ncfs("metaA", "metaB"), false);
+        checkSample(sample, sampleDataLG2, ncfs("dataA"), true);
+
+        ArrayList<Entry<Key,Value>> allSampleData = new ArrayList<Entry<Key,Value>>();
+        allSampleData.addAll(sampleDataLG1);
+        allSampleData.addAll(sampleDataLG2);
+
+        Collections.sort(allSampleData, new Comparator<Entry<Key,Value>>() {
+          @Override
+          public int compare(Entry<Key,Value> o1, Entry<Key,Value> o2) {
+            return o1.getKey().compareTo(o2.getKey());
+          }
+        });
+
+        checkSample(sample, allSampleData, ncfs("dataA", "metaA"), true);
+        checkSample(sample, allSampleData, EMPTY_COL_FAMS, false);
+
+        trf.closeReader();
+      }
+    }
+  }
+
+  @Test
+  public void testEncSample() throws IOException {
+    conf = setAndGetAccumuloConfig(CryptoTest.CRYPTO_ON_CONF);
+    testSample();
+    testSampleLG();
+    conf = null;
+  }
+
   @Test
   public void testCryptoDoesntLeakSensitive() throws IOException {
     conf = setAndGetAccumuloConfig(CryptoTest.CRYPTO_ON_CONF);

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/test/java/org/apache/accumulo/core/iterators/DefaultIteratorEnvironment.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/iterators/DefaultIteratorEnvironment.java b/core/src/test/java/org/apache/accumulo/core/iterators/DefaultIteratorEnvironment.java
index 316823c..3c68196 100644
--- a/core/src/test/java/org/apache/accumulo/core/iterators/DefaultIteratorEnvironment.java
+++ b/core/src/test/java/org/apache/accumulo/core/iterators/DefaultIteratorEnvironment.java
@@ -18,17 +18,16 @@ package org.apache.accumulo.core.iterators;
 
 import java.io.IOException;
 
+import org.apache.accumulo.core.client.impl.BaseIteratorEnvironment;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
 import org.apache.accumulo.core.iterators.system.MapFileIterator;
-import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.util.CachedConfiguration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 
-public class DefaultIteratorEnvironment implements IteratorEnvironment {
+public class DefaultIteratorEnvironment extends BaseIteratorEnvironment {
 
   AccumuloConfiguration conf;
 
@@ -53,23 +52,7 @@ public class DefaultIteratorEnvironment implements IteratorEnvironment {
   }
 
   @Override
-  public IteratorScope getIteratorScope() {
-    throw new UnsupportedOperationException();
+  public boolean isSamplingEnabled() {
+    return false;
   }
-
-  @Override
-  public boolean isFullMajorCompaction() {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public void registerSideChannel(SortedKeyValueIterator<Key,Value> iter) {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public Authorizations getAuthorizations() {
-    throw new UnsupportedOperationException();
-  }
-
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/test/java/org/apache/accumulo/core/iterators/FirstEntryInRowIteratorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/iterators/FirstEntryInRowIteratorTest.java b/core/src/test/java/org/apache/accumulo/core/iterators/FirstEntryInRowIteratorTest.java
index 74f7462..5455aa6 100644
--- a/core/src/test/java/org/apache/accumulo/core/iterators/FirstEntryInRowIteratorTest.java
+++ b/core/src/test/java/org/apache/accumulo/core/iterators/FirstEntryInRowIteratorTest.java
@@ -22,14 +22,12 @@ import java.io.IOException;
 import java.util.Collections;
 import java.util.TreeMap;
 
-import org.apache.accumulo.core.conf.AccumuloConfiguration;
+import org.apache.accumulo.core.client.impl.BaseIteratorEnvironment;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.PartialKey;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
 import org.apache.accumulo.core.iterators.system.CountingIterator;
-import org.apache.accumulo.core.security.Authorizations;
 import org.junit.Test;
 
 public class FirstEntryInRowIteratorTest {
@@ -39,38 +37,7 @@ public class FirstEntryInRowIteratorTest {
     org.apache.accumulo.core.iterators.SortedMapIterator source = new SortedMapIterator(sourceMap);
     CountingIterator counter = new CountingIterator(source);
     FirstEntryInRowIterator feiri = new FirstEntryInRowIterator();
-    IteratorEnvironment env = new IteratorEnvironment() {
-
-      @Override
-      public AccumuloConfiguration getConfig() {
-        return null;
-      }
-
-      @Override
-      public IteratorScope getIteratorScope() {
-        return null;
-      }
-
-      @Override
-      public boolean isFullMajorCompaction() {
-        return false;
-      }
-
-      @Override
-      public void registerSideChannel(SortedKeyValueIterator<Key,Value> arg0) {
-
-      }
-
-      @Override
-      public Authorizations getAuthorizations() {
-        return null;
-      }
-
-      @Override
-      public SortedKeyValueIterator<Key,Value> reserveMapFileReader(String arg0) throws IOException {
-        return null;
-      }
-    };
+    IteratorEnvironment env = new BaseIteratorEnvironment();
 
     feiri.init(counter, Collections.singletonMap(FirstEntryInRowIterator.NUM_SCANS_STRING_NAME, Integer.toString(numScans)), env);
 

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/test/java/org/apache/accumulo/core/iterators/SortedMapIteratorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/iterators/SortedMapIteratorTest.java b/core/src/test/java/org/apache/accumulo/core/iterators/SortedMapIteratorTest.java
new file mode 100644
index 0000000..7557b9a
--- /dev/null
+++ b/core/src/test/java/org/apache/accumulo/core/iterators/SortedMapIteratorTest.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.iterators;
+
+import java.util.TreeMap;
+
+import org.apache.accumulo.core.client.SampleNotPresentException;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
+import org.apache.accumulo.core.client.impl.BaseIteratorEnvironment;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.sample.RowSampler;
+import org.junit.Test;
+
+public class SortedMapIteratorTest {
+
+  @Test(expected = SampleNotPresentException.class)
+  public void testSampleNotPresent() {
+    SortedMapIterator smi = new SortedMapIterator(new TreeMap<Key,Value>());
+    smi.deepCopy(new BaseIteratorEnvironment() {
+      @Override
+      public boolean isSamplingEnabled() {
+        return true;
+      }
+
+      @Override
+      public SamplerConfiguration getSamplerConfiguration() {
+        return new SamplerConfiguration(RowSampler.class.getName());
+      }
+    });
+  }
+}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/test/java/org/apache/accumulo/core/iterators/user/RowDeletingIteratorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/iterators/user/RowDeletingIteratorTest.java b/core/src/test/java/org/apache/accumulo/core/iterators/user/RowDeletingIteratorTest.java
index a3c1cca..bdaf112 100644
--- a/core/src/test/java/org/apache/accumulo/core/iterators/user/RowDeletingIteratorTest.java
+++ b/core/src/test/java/org/apache/accumulo/core/iterators/user/RowDeletingIteratorTest.java
@@ -16,30 +16,26 @@
  */
 package org.apache.accumulo.core.iterators.user;
 
-import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.TreeMap;
 
-import junit.framework.TestCase;
-
-import org.apache.accumulo.core.conf.AccumuloConfiguration;
+import org.apache.accumulo.core.client.impl.BaseIteratorEnvironment;
 import org.apache.accumulo.core.data.ArrayByteSequence;
 import org.apache.accumulo.core.data.ByteSequence;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.iterators.IteratorEnvironment;
 import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
-import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 import org.apache.accumulo.core.iterators.SortedMapIterator;
 import org.apache.accumulo.core.iterators.system.ColumnFamilySkippingIterator;
-import org.apache.accumulo.core.security.Authorizations;
 import org.apache.hadoop.io.Text;
 
+import junit.framework.TestCase;
+
 public class RowDeletingIteratorTest extends TestCase {
 
-  public static class TestIE implements IteratorEnvironment {
+  public static class TestIE extends BaseIteratorEnvironment {
 
     private IteratorScope scope;
     private boolean fmc;
@@ -50,11 +46,6 @@ public class RowDeletingIteratorTest extends TestCase {
     }
 
     @Override
-    public AccumuloConfiguration getConfig() {
-      return null;
-    }
-
-    @Override
     public IteratorScope getIteratorScope() {
       return scope;
     }
@@ -63,19 +54,6 @@ public class RowDeletingIteratorTest extends TestCase {
     public boolean isFullMajorCompaction() {
       return fmc;
     }
-
-    @Override
-    public SortedKeyValueIterator<Key,Value> reserveMapFileReader(String mapFileName) throws IOException {
-      return null;
-    }
-
-    @Override
-    public void registerSideChannel(SortedKeyValueIterator<Key,Value> iter) {}
-
-    @Override
-    public Authorizations getAuthorizations() {
-      return null;
-    }
   }
 
   Key nk(String row, String cf, String cq, long time) {

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/test/java/org/apache/accumulo/core/iterators/user/RowEncodingIteratorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/iterators/user/RowEncodingIteratorTest.java b/core/src/test/java/org/apache/accumulo/core/iterators/user/RowEncodingIteratorTest.java
index 8f228f5..d9aa174 100644
--- a/core/src/test/java/org/apache/accumulo/core/iterators/user/RowEncodingIteratorTest.java
+++ b/core/src/test/java/org/apache/accumulo/core/iterators/user/RowEncodingIteratorTest.java
@@ -16,26 +16,15 @@
  */
 package org.apache.accumulo.core.iterators.user;
 
-import org.apache.accumulo.core.conf.AccumuloConfiguration;
-import org.apache.accumulo.core.data.ByteSequence;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.iterators.IteratorEnvironment;
-import org.apache.accumulo.core.iterators.IteratorUtil;
-import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
-import org.apache.accumulo.core.iterators.SortedMapIterator;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.commons.collections.BufferOverflowException;
-import org.apache.hadoop.io.Text;
-import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
-
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -43,23 +32,20 @@ import java.util.Map;
 import java.util.SortedMap;
 import java.util.TreeMap;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
+import org.apache.accumulo.core.client.impl.BaseIteratorEnvironment;
+import org.apache.accumulo.core.data.ByteSequence;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.IteratorUtil;
+import org.apache.accumulo.core.iterators.SortedMapIterator;
+import org.apache.commons.collections.BufferOverflowException;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
 
 public class RowEncodingIteratorTest {
 
-  private static final class DummyIteratorEnv implements IteratorEnvironment {
-    @Override
-    public SortedKeyValueIterator<Key,Value> reserveMapFileReader(String mapFileName) throws IOException {
-      return null;
-    }
-
-    @Override
-    public AccumuloConfiguration getConfig() {
-      return null;
-    }
-
+  private static final class DummyIteratorEnv extends BaseIteratorEnvironment {
     @Override
     public IteratorUtil.IteratorScope getIteratorScope() {
       return IteratorUtil.IteratorScope.scan;
@@ -69,16 +55,6 @@ public class RowEncodingIteratorTest {
     public boolean isFullMajorCompaction() {
       return false;
     }
-
-    @Override
-    public void registerSideChannel(SortedKeyValueIterator<Key,Value> iter) {
-
-    }
-
-    @Override
-    public Authorizations getAuthorizations() {
-      return null;
-    }
   }
 
   private static final class RowEncodingIteratorImpl extends RowEncodingIterator {

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/test/java/org/apache/accumulo/core/iterators/user/TransformingIteratorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/iterators/user/TransformingIteratorTest.java b/core/src/test/java/org/apache/accumulo/core/iterators/user/TransformingIteratorTest.java
index 1f4d6e7..97ebe5c 100644
--- a/core/src/test/java/org/apache/accumulo/core/iterators/user/TransformingIteratorTest.java
+++ b/core/src/test/java/org/apache/accumulo/core/iterators/user/TransformingIteratorTest.java
@@ -34,7 +34,7 @@ import java.util.SortedMap;
 import java.util.TreeMap;
 
 import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.conf.AccumuloConfiguration;
+import org.apache.accumulo.core.client.impl.BaseIteratorEnvironment;
 import org.apache.accumulo.core.data.ArrayByteSequence;
 import org.apache.accumulo.core.data.ByteSequence;
 import org.apache.accumulo.core.data.Key;
@@ -599,7 +599,7 @@ public class TransformingIteratorTest {
   public static class ColFamReversingCompactionKeyTransformingIterator extends ColFamReversingKeyTransformingIterator {
     @Override
     public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {
-      env = new MajCIteratorEnvironmentAdapter(env);
+      env = new MajCIteratorEnvironmentAdapter();
       super.init(source, options, env);
     }
   }
@@ -639,7 +639,7 @@ public class TransformingIteratorTest {
   public static class IllegalVisCompactionKeyTransformingIterator extends IllegalVisKeyTransformingIterator {
     @Override
     public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {
-      env = new MajCIteratorEnvironmentAdapter(env);
+      env = new MajCIteratorEnvironmentAdapter();
       super.init(source, options, env);
     }
   }
@@ -665,7 +665,7 @@ public class TransformingIteratorTest {
   public static class BadVisCompactionKeyTransformingIterator extends BadVisKeyTransformingIterator {
     @Override
     public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {
-      env = new MajCIteratorEnvironmentAdapter(env);
+      env = new MajCIteratorEnvironmentAdapter();
       super.init(source, options, env);
     }
   }
@@ -711,41 +711,10 @@ public class TransformingIteratorTest {
     }
   }
 
-  private static class MajCIteratorEnvironmentAdapter implements IteratorEnvironment {
-    private IteratorEnvironment delegate;
-
-    public MajCIteratorEnvironmentAdapter(IteratorEnvironment delegate) {
-      this.delegate = delegate;
-    }
-
-    @Override
-    public SortedKeyValueIterator<Key,Value> reserveMapFileReader(String mapFileName) throws IOException {
-      return delegate.reserveMapFileReader(mapFileName);
-    }
-
-    @Override
-    public AccumuloConfiguration getConfig() {
-      return delegate.getConfig();
-    }
-
+  private static class MajCIteratorEnvironmentAdapter extends BaseIteratorEnvironment {
     @Override
     public IteratorScope getIteratorScope() {
       return IteratorScope.majc;
     }
-
-    @Override
-    public boolean isFullMajorCompaction() {
-      return delegate.isFullMajorCompaction();
-    }
-
-    @Override
-    public void registerSideChannel(SortedKeyValueIterator<Key,Value> iter) {
-      delegate.registerSideChannel(iter);
-    }
-
-    @Override
-    public Authorizations getAuthorizations() {
-      return null;
-    }
   }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/test/resources/org/apache/accumulo/core/file/rfile/ver_7.rf
----------------------------------------------------------------------
diff --git a/core/src/test/resources/org/apache/accumulo/core/file/rfile/ver_7.rf b/core/src/test/resources/org/apache/accumulo/core/file/rfile/ver_7.rf
new file mode 100644
index 0000000..7d2c9f7
Binary files /dev/null and b/core/src/test/resources/org/apache/accumulo/core/file/rfile/ver_7.rf differ

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/docs/src/main/asciidoc/accumulo_user_manual.asciidoc
----------------------------------------------------------------------
diff --git a/docs/src/main/asciidoc/accumulo_user_manual.asciidoc b/docs/src/main/asciidoc/accumulo_user_manual.asciidoc
index 32f19fe..b62983a 100644
--- a/docs/src/main/asciidoc/accumulo_user_manual.asciidoc
+++ b/docs/src/main/asciidoc/accumulo_user_manual.asciidoc
@@ -59,6 +59,8 @@ include::chapters/ssl.txt[]
 
 include::chapters/kerberos.txt[]
 
+include::chapters/sampling.txt[]
+
 include::chapters/administration.txt[]
 
 include::chapters/multivolume.txt[]

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/docs/src/main/asciidoc/chapters/sampling.txt
----------------------------------------------------------------------
diff --git a/docs/src/main/asciidoc/chapters/sampling.txt b/docs/src/main/asciidoc/chapters/sampling.txt
new file mode 100644
index 0000000..f035c56
--- /dev/null
+++ b/docs/src/main/asciidoc/chapters/sampling.txt
@@ -0,0 +1,86 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+== Sampling
+
+=== Overview
+
+Accumulo has the ability to generate and scan a per table set of sample data.
+This sample data is kept up to date as a table is mutated.  What key values are
+placed in the sample data is configurable per table.
+
+This feature can be used for query estimation and optimization.  For an example
+of estimaiton assume an Accumulo table is configured to generate a sample
+containing one millionth of a tables data.   If a query is executed against the
+sample and returns one thousand results, then the same query against all the
+data would probably return a billion results.  A nice property of having
+Accumulo generate the sample is that its always up to date.  So estimations
+will be accurate even when querying the most recently written data.
+
+An example of a query optimization is an iterator using sample data to get an
+estimate, and then making decisions based on the estimate.
+
+=== Configuring
+
+Inorder to use sampling, an Accumulo table must be configured with a class that
+implements +org.apache.accumulo.core.sample.Sampler+ along with options for
+that class.  For guidance on implementing a Sampler see that interface's
+javadoc.  Accumulo provides a few implementations out of the box.   For
+information on how to use the samplers that ship with Accumulo look in the
+package `org.apache.accumulo.core.sample` and consult the javadoc of the
+classes there.  See +README.sample+ and +SampleExample.java+ for examples of
+how to configure a Sampler on a table.
+
+Once a table is configured with a sampler all writes after that point will
+generate sample data.  For data written before sampling was configured sample
+data will not be present.  A compaction can be initiated that only compacts the
+files in the table that do not have sample data.   The example readme shows how
+to do this.
+
+If the sampling configuration of a table is changed, then Accumulo will start
+generating new sample data with the new configuration.   However old data will
+still have sample data generated with the previous configuration.  A selective
+compaction can also be issued in this case to regenerate the sample data.
+
+=== Scanning sample data
+
+Inorder to scan sample data, use the +setSamplerConfiguration(...)+  method on
++Scanner+ or +BatchScanner+.  Please consult this methods javadocs for more
+information.
+
+Sample data can also be scanned from within an Accumulo
++SortedKeyValueIterator+.  To see how to do this look at the example iterator
+referenced in README.sample.  Also, consult the javadoc on
++org.apache.accumulo.core.iterators.IteratorEnvironment.cloneWithSamplingEnabled()+.
+
+Map reduce jobs using the +AccumuloInputFormat+ can also read sample data.  See
+the javadoc for the +setSamplerConfiguration()+ method on
++AccumuloInputFormat+.
+
+Scans over sample data will throw a +SampleNotPresentException+ in the following cases :
+
+. sample data is not present,
+. sample data is present but was generated with multiple configurations
+. sample data is partially present
+
+So a scan over sample data can only succeed if all data written has sample data
+generated with the same configuration.
+
+=== Bulk import
+
+When generating rfiles to bulk import into Accumulo, those rfiles can contain
+sample data.  To use this feature, look at the javadoc on the
++AccumuloFileOutputFormat.setSampler(...)+ method.
+

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/docs/src/main/resources/examples/README
----------------------------------------------------------------------
diff --git a/docs/src/main/resources/examples/README b/docs/src/main/resources/examples/README
index 4211050..03c2e05 100644
--- a/docs/src/main/resources/examples/README
+++ b/docs/src/main/resources/examples/README
@@ -80,6 +80,8 @@ features of Apache Accumulo.
    README.rowhash:     Using MapReduce to read a table and write to a new
                        column in the same table.
 
+   README.sample:      Building and using sample data in Accumulo.
+
    README.shard:       Using the intersecting iterator with a term index
                        partitioned by document.
 

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/docs/src/main/resources/examples/README.sample
----------------------------------------------------------------------
diff --git a/docs/src/main/resources/examples/README.sample b/docs/src/main/resources/examples/README.sample
new file mode 100644
index 0000000..15288aa
--- /dev/null
+++ b/docs/src/main/resources/examples/README.sample
@@ -0,0 +1,192 @@
+Title: Apache Accumulo Batch Writing and Scanning Example
+Notice:    Licensed to the Apache Software Foundation (ASF) under one
+           or more contributor license agreements.  See the NOTICE file
+           distributed with this work for additional information
+           regarding copyright ownership.  The ASF licenses this file
+           to you under the Apache License, Version 2.0 (the
+           "License"); you may not use this file except in compliance
+           with the License.  You may obtain a copy of the License at
+           .
+             http://www.apache.org/licenses/LICENSE-2.0
+           .
+           Unless required by applicable law or agreed to in writing,
+           software distributed under the License is distributed on an
+           "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+           KIND, either express or implied.  See the License for the
+           specific language governing permissions and limitations
+           under the License.
+
+
+Basic Sampling Example
+----------------------
+
+Accumulo supports building a set of sample data that can be efficiently
+accessed by scanners.  What data is included in the sample set is configurable.
+Below, some data representing documents are inserted.  
+
+    root@instance sampex> createtable sampex
+    root@instance sampex> insert 9255 doc content 'abcde'
+    root@instance sampex> insert 9255 doc url file://foo.txt
+    root@instance sampex> insert 8934 doc content 'accumulo scales'
+    root@instance sampex> insert 8934 doc url file://accumulo_notes.txt
+    root@instance sampex> insert 2317 doc content 'milk, eggs, bread, parmigiano-reggiano'
+    root@instance sampex> insert 2317 doc url file://groceries/9.txt
+    root@instance sampex> insert 3900 doc content 'EC2 ate my homework'
+    root@instance sampex> insert 3900 doc uril file://final_project.txt
+
+Below the table sampex is configured to build a sample set.  The configuration
+causes Accumulo to include any row where `murmur3_32(row) % 3 ==0` in the
+tables sample data.
+
+    root@instance sampex> config -t sampex -s table.sampler.opt.hasher=murmur3_32
+    root@instance sampex> config -t sampex -s table.sampler.opt.modulus=3
+    root@instance sampex> config -t sampex -s table.sampler=org.apache.accumulo.core.sample.RowSampler
+
+Below, attempting to scan the sample returns an error.  This is because data
+was inserted before the sample set was configured.
+
+    root@instance sampex> scan --sample
+    2015-09-09 12:21:50,643 [shell.Shell] ERROR: org.apache.accumulo.core.client.SampleNotPresentException: Table sampex(ID:2) does not have sampling configured or built
+
+To remedy this problem, the following command will flush in memory data and
+compact any files that do not contain the correct sample data.   
+
+    root@instance sampex> compact -t sampex --sf-no-sample
+
+After the compaction, the sample scan works.  
+
+    root@instance sampex> scan --sample
+    2317 doc:content []    milk, eggs, bread, parmigiano-reggiano
+    2317 doc:url []    file://groceries/9.txt
+
+The commands below show that updates to data in the sample are seen when
+scanning the sample.
+
+    root@instance sampex> insert 2317 doc content 'milk, eggs, bread, parmigiano-reggiano, butter'
+    root@instance sampex> scan --sample
+    2317 doc:content []    milk, eggs, bread, parmigiano-reggiano, butter
+    2317 doc:url []    file://groceries/9.txt
+
+Inorder to make scanning the sample fast, sample data is partitioned as data is
+written to Accumulo.  This means if the sample configuration is changed, that
+data written previously is partitioned using a different criteria.  Accumulo
+will detect this situation and fail sample scans.  The commands below show this
+failure and fixiing the problem with a compaction.
+
+    root@instance sampex> config -t sampex -s table.sampler.opt.modulus=2
+    root@instance sampex> scan --sample
+    2015-09-09 12:22:51,058 [shell.Shell] ERROR: org.apache.accumulo.core.client.SampleNotPresentException: Table sampex(ID:2) does not have sampling configured or built
+    root@instance sampex> compact -t sampex --sf-no-sample
+    2015-09-09 12:23:07,242 [shell.Shell] INFO : Compaction of table sampex started for given range
+    root@instance sampex> scan --sample
+    2317 doc:content []    milk, eggs, bread, parmigiano-reggiano
+    2317 doc:url []    file://groceries/9.txt
+    3900 doc:content []    EC2 ate my homework
+    3900 doc:uril []    file://final_project.txt
+    9255 doc:content []    abcde
+    9255 doc:url []    file://foo.txt
+
+The example above is replicated in a java program using the Accumulo API.
+Below is the program name and the command to run it.
+
+    ./bin/accumulo org.apache.accumulo.examples.simple.sample.SampleExample -i instance -z localhost -u root -p secret
+
+The commands below look under the hood to give some insight into how this
+feature works.  The commands determine what files the sampex table is using.
+
+    root@instance sampex> tables -l
+    accumulo.metadata    =>        !0
+    accumulo.replication =>      +rep
+    accumulo.root        =>        +r
+    sampex               =>         2
+    trace                =>         1
+    root@instance sampex> scan -t accumulo.metadata -c file -b 2 -e 2<
+    2< file:hdfs://localhost:10000/accumulo/tables/2/default_tablet/A000000s.rf []    702,8
+
+Below shows running `accumulo rfile-info` on the file above.  This shows the
+rfile has a normal default locality group and a sample default locality group.
+The output also shows the configuration used to create the sample locality
+group.  The sample configuration within a rfile must match the tables sample
+configuration for sample scan to work.
+
+    $ ./bin/accumulo rfile-info hdfs://localhost:10000/accumulo/tables/2/default_tablet/A000000s.rf
+    Reading file: hdfs://localhost:10000/accumulo/tables/2/default_tablet/A000000s.rf
+    RFile Version            : 8
+    
+    Locality group           : <DEFAULT>
+    	Start block            : 0
+    	Num   blocks           : 1
+    	Index level 0          : 35 bytes  1 blocks
+    	First key              : 2317 doc:content [] 1437672014986 false
+    	Last key               : 9255 doc:url [] 1437672014875 false
+    	Num entries            : 8
+    	Column families        : [doc]
+    
+    Sample Configuration     :
+    	Sampler class          : org.apache.accumulo.core.sample.RowSampler
+    	Sampler options        : {hasher=murmur3_32, modulus=2}
+
+    Sample Locality group    : <DEFAULT>
+    	Start block            : 0
+    	Num   blocks           : 1
+    	Index level 0          : 36 bytes  1 blocks
+    	First key              : 2317 doc:content [] 1437672014986 false
+    	Last key               : 9255 doc:url [] 1437672014875 false
+    	Num entries            : 6
+    	Column families        : [doc]
+    
+    Meta block     : BCFile.index
+          Raw size             : 4 bytes
+          Compressed size      : 12 bytes
+          Compression type     : gz
+
+    Meta block     : RFile.index
+          Raw size             : 309 bytes
+          Compressed size      : 176 bytes
+          Compression type     : gz
+
+
+Shard Sampling Example
+-------------------------
+
+`README.shard` shows how to index and search files using Accumulo.  That
+example indexes documents into a table named `shard`.  The indexing scheme used
+in that example places the document name in the column qualifier.  A useful
+sample of this indexing scheme should contain all data for any document in the
+sample.   To accomplish this, the following commands build a sample for the
+shard table based on the column qualifier.
+
+    root@instance shard> config -t shard -s table.sampler.opt.hasher=murmur3_32
+    root@instance shard> config -t shard -s table.sampler.opt.modulus=101
+    root@instance shard> config -t shard -s table.sampler.opt.qualifier=true
+    root@instance shard> config -t shard -s table.sampler=org.apache.accumulo.core.sample.RowColumnSampler
+    root@instance shard> compact -t shard --sf-no-sample -w
+    2015-07-23 15:00:09,280 [shell.Shell] INFO : Compacting table ...
+    2015-07-23 15:00:10,134 [shell.Shell] INFO : Compaction of table shard completed for given range
+
+After enabling sampling, the command below counts the number of documents in
+the sample containing the words `import` and `int`.     
+
+    $ ./bin/accumulo org.apache.accumulo.examples.simple.shard.Query --sample -i instance16 -z localhost -t shard -u root -p secret import int | fgrep '.java' | wc
+         11      11    1246
+
+The command below counts the total number of documents containing the words
+`import` and `int`.
+
+    $ ./bin/accumulo org.apache.accumulo.examples.simple.shard.Query -i instance16 -z localhost -t shard -u root -p secret import int | fgrep '.java' | wc
+       1085    1085  118175
+
+The counts 11 out of 1085 total are around what would be expected for a modulus
+of 101.  Querying the sample first provides a quick way to estimate how much data
+the real query will bring back. 
+
+Another way sample data could be used with the shard example is with a
+specialized iterator.  In the examples source code there is an iterator named
+CutoffIntersectingIterator.  This iterator first checks how many documents are
+found in the sample data.  If too many documents are found in the sample data,
+then it returns nothing.   Otherwise it proceeds to query the full data set.
+To experiment with this iterator, use the following command.  The
+`--sampleCutoff` option below will cause the query to return nothing if based
+on the sample it appears a query would return more than 1000 documents.
+
+    $ ./bin/accumulo org.apache.accumulo.examples.simple.shard.Query --sampleCutoff 1000 -i instance16 -z localhost -t shard -u root -p secret import int | fgrep '.java' | wc

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/examples/simple/src/main/java/org/apache/accumulo/examples/simple/sample/SampleExample.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/sample/SampleExample.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/sample/SampleExample.java
new file mode 100644
index 0000000..57d77b1
--- /dev/null
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/sample/SampleExample.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.examples.simple.sample;
+
+import java.util.Collections;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.cli.BatchWriterOpts;
+import org.apache.accumulo.core.cli.ClientOnDefaultTable;
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.SampleNotPresentException;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.admin.CompactionConfig;
+import org.apache.accumulo.core.client.admin.CompactionStrategyConfig;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.sample.RowSampler;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.examples.simple.client.RandomBatchWriter;
+import org.apache.accumulo.examples.simple.shard.CutoffIntersectingIterator;
+
+import com.google.common.collect.ImmutableMap;
+
+/**
+ * A simple example of using Accumulo's sampling feature. This example does something similar to what README.sample shows using the shell. Also see
+ * {@link CutoffIntersectingIterator} and README.sample for an example of how to use sample data from within an iterator.
+ */
+public class SampleExample {
+
+  // a compaction strategy that only selects files for compaction that have no sample data or sample data created in a different way than the tables
+  static final CompactionStrategyConfig NO_SAMPLE_STRATEGY = new CompactionStrategyConfig(
+      "org.apache.accumulo.tserver.compaction.strategies.ConfigurableCompactionStrategy").setOptions(Collections.singletonMap("SF_NO_SAMPLE", ""));
+
+  static class Opts extends ClientOnDefaultTable {
+    public Opts() {
+      super("sampex");
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    Opts opts = new Opts();
+    BatchWriterOpts bwOpts = new BatchWriterOpts();
+    opts.parseArgs(RandomBatchWriter.class.getName(), args, bwOpts);
+
+    Connector conn = opts.getConnector();
+
+    if (!conn.tableOperations().exists(opts.getTableName())) {
+      conn.tableOperations().create(opts.getTableName());
+    } else {
+      System.out.println("Table exists, not doing anything.");
+      return;
+    }
+
+    // write some data
+    BatchWriter bw = conn.createBatchWriter(opts.getTableName(), bwOpts.getBatchWriterConfig());
+    bw.addMutation(createMutation("9225", "abcde", "file://foo.txt"));
+    bw.addMutation(createMutation("8934", "accumulo scales", "file://accumulo_notes.txt"));
+    bw.addMutation(createMutation("2317", "milk, eggs, bread, parmigiano-reggiano", "file://groceries/9/txt"));
+    bw.addMutation(createMutation("3900", "EC2 ate my homework", "file://final_project.txt"));
+    bw.flush();
+
+    SamplerConfiguration sc1 = new SamplerConfiguration(RowSampler.class.getName());
+    sc1.setOptions(ImmutableMap.of("hasher", "murmur3_32", "modulus", "3"));
+
+    conn.tableOperations().setSamplerConfiguration(opts.getTableName(), sc1);
+
+    Scanner scanner = conn.createScanner(opts.getTableName(), Authorizations.EMPTY);
+    System.out.println("Scanning all data :");
+    print(scanner);
+    System.out.println();
+
+    System.out.println("Scanning with sampler configuration.  Data was written before sampler was set on table, scan should fail.");
+    scanner.setSamplerConfiguration(sc1);
+    try {
+      print(scanner);
+    } catch (SampleNotPresentException e) {
+      System.out.println("  Saw sample not present exception as expected.");
+    }
+    System.out.println();
+
+    // compact table to recreate sample data
+    conn.tableOperations().compact(opts.getTableName(), new CompactionConfig().setCompactionStrategy(NO_SAMPLE_STRATEGY));
+
+    System.out.println("Scanning after compaction (compaction should have created sample data) : ");
+    print(scanner);
+    System.out.println();
+
+    // update a document in the sample data
+    bw.addMutation(createMutation("2317", "milk, eggs, bread, parmigiano-reggiano, butter", "file://groceries/9/txt"));
+    bw.close();
+    System.out.println("Scanning sample after updating content for docId 2317 (should see content change in sample data) : ");
+    print(scanner);
+    System.out.println();
+
+    // change tables sampling configuration...
+    SamplerConfiguration sc2 = new SamplerConfiguration(RowSampler.class.getName());
+    sc2.setOptions(ImmutableMap.of("hasher", "murmur3_32", "modulus", "2"));
+    conn.tableOperations().setSamplerConfiguration(opts.getTableName(), sc2);
+    // compact table to recreate sample data using new configuration
+    conn.tableOperations().compact(opts.getTableName(), new CompactionConfig().setCompactionStrategy(NO_SAMPLE_STRATEGY));
+
+    System.out.println("Scanning with old sampler configuration.  Sample data was created using new configuration with a compaction.  Scan should fail.");
+    try {
+      // try scanning with old sampler configuration
+      print(scanner);
+    } catch (SampleNotPresentException e) {
+      System.out.println("  Saw sample not present exception as expected ");
+    }
+    System.out.println();
+
+    // update expected sampler configuration on scanner
+    scanner.setSamplerConfiguration(sc2);
+
+    System.out.println("Scanning with new sampler configuration : ");
+    print(scanner);
+    System.out.println();
+
+  }
+
+  private static void print(Scanner scanner) {
+    for (Entry<Key,Value> entry : scanner) {
+      System.out.println("  " + entry.getKey() + " " + entry.getValue());
+    }
+  }
+
+  private static Mutation createMutation(String docId, String content, String url) {
+    Mutation m = new Mutation(docId);
+    m.put("doc", "context", content);
+    m.put("doc", "url", url);
+    return m;
+  }
+}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/CutoffIntersectingIterator.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/CutoffIntersectingIterator.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/CutoffIntersectingIterator.java
new file mode 100644
index 0000000..133e8ae
--- /dev/null
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/CutoffIntersectingIterator.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.examples.simple.shard;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Map;
+
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
+import org.apache.accumulo.core.data.ByteSequence;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.IteratorEnvironment;
+import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
+import org.apache.accumulo.core.iterators.user.IntersectingIterator;
+import org.apache.accumulo.core.sample.RowColumnSampler;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * This iterator uses a sample built from the Column Qualifier to quickly avoid intersecting iterator queries that may return too many documents.
+ */
+
+public class CutoffIntersectingIterator extends IntersectingIterator {
+
+  private IntersectingIterator sampleII;
+  private int sampleMax;
+  private boolean hasTop;
+
+  public static void setCutoff(IteratorSetting iterCfg, int cutoff) {
+    Preconditions.checkArgument(cutoff >= 0);
+    iterCfg.addOption("cutoff", cutoff + "");
+  }
+
+  @Override
+  public boolean hasTop() {
+    return hasTop && super.hasTop();
+  }
+
+  @Override
+  public void seek(Range range, Collection<ByteSequence> seekColumnFamilies, boolean inclusive) throws IOException {
+
+    sampleII.seek(range, seekColumnFamilies, inclusive);
+
+    // this check will be redone whenever iterator stack is torn down and recreated.
+    int count = 0;
+    while (count <= sampleMax && sampleII.hasTop()) {
+      sampleII.next();
+      count++;
+    }
+
+    if (count > sampleMax) {
+      // In a real application would probably want to return a key value that indicates too much data. Since this would execute for each tablet, some tablets
+      // may return data. For tablets that did not return data, would want an indication.
+      hasTop = false;
+    } else {
+      hasTop = true;
+      super.seek(range, seekColumnFamilies, inclusive);
+    }
+  }
+
+  @Override
+  public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {
+    super.init(source, options, env);
+
+    IteratorEnvironment sampleEnv = env.cloneWithSamplingEnabled();
+
+    setMax(sampleEnv, options);
+
+    SortedKeyValueIterator<Key,Value> sampleDC = source.deepCopy(sampleEnv);
+    sampleII = new IntersectingIterator();
+    sampleII.init(sampleDC, options, env);
+
+  }
+
+  static void validateSamplerConfig(SamplerConfiguration sampleConfig) {
+    Preconditions.checkNotNull(sampleConfig);
+    Preconditions.checkArgument(sampleConfig.getSamplerClassName().equals(RowColumnSampler.class.getName()),
+        "Unexpected Sampler " + sampleConfig.getSamplerClassName());
+    Preconditions.checkArgument(sampleConfig.getOptions().get("qualifier").equals("true"), "Expected sample on column qualifier");
+    Preconditions.checkArgument(isNullOrFalse(sampleConfig.getOptions(), "row", "family", "visibility"), "Expected sample on column qualifier only");
+  }
+
+  private void setMax(IteratorEnvironment sampleEnv, Map<String,String> options) {
+    String cutoffValue = options.get("cutoff");
+    SamplerConfiguration sampleConfig = sampleEnv.getSamplerConfiguration();
+
+    // Ensure the sample was constructed in an expected way. If the sample is not built as expected, then can not draw conclusions based on sample.
+    Preconditions.checkNotNull(cutoffValue, "Expected cutoff option is missing");
+    validateSamplerConfig(sampleConfig);
+
+    int modulus = Integer.parseInt(sampleConfig.getOptions().get("modulus"));
+
+    sampleMax = Math.round(Float.parseFloat(cutoffValue) / modulus);
+  }
+
+  private static boolean isNullOrFalse(Map<String,String> options, String... keys) {
+    for (String key : keys) {
+      String val = options.get(key);
+      if (val != null && val.equals("true")) {
+        return false;
+      }
+    }
+    return true;
+  }
+}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Query.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Query.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Query.java
index 41d5dc7..7925855 100644
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Query.java
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Query.java
@@ -27,6 +27,7 @@ import org.apache.accumulo.core.cli.ClientOnRequiredTable;
 import org.apache.accumulo.core.client.BatchScanner;
 import org.apache.accumulo.core.client.Connector;
 import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
@@ -46,16 +47,32 @@ public class Query {
   static class Opts extends ClientOnRequiredTable {
     @Parameter(description = " term { <term> ... }")
     List<String> terms = new ArrayList<String>();
+
+    @Parameter(names = {"--sample"}, description = "Do queries against sample, useful when sample is built using column qualifier")
+    private boolean useSample = false;
+
+    @Parameter(names = {"--sampleCutoff"},
+        description = "Use sample data to determine if a query might return a number of documents over the cutoff.  This check is per tablet.")
+    private Integer sampleCutoff = null;
   }
 
-  public static List<String> query(BatchScanner bs, List<String> terms) {
+  public static List<String> query(BatchScanner bs, List<String> terms, Integer cutoff) {
 
     Text columns[] = new Text[terms.size()];
     int i = 0;
     for (String term : terms) {
       columns[i++] = new Text(term);
     }
-    IteratorSetting ii = new IteratorSetting(20, "ii", IntersectingIterator.class);
+
+    IteratorSetting ii;
+
+    if (cutoff != null) {
+      ii = new IteratorSetting(20, "ii", CutoffIntersectingIterator.class);
+      CutoffIntersectingIterator.setCutoff(ii, cutoff);
+    } else {
+      ii = new IteratorSetting(20, "ii", IntersectingIterator.class);
+    }
+
     IntersectingIterator.setColumnFamilies(ii, columns);
     bs.addScanIterator(ii);
     bs.setRanges(Collections.singleton(new Range()));
@@ -73,9 +90,15 @@ public class Query {
     Connector conn = opts.getConnector();
     BatchScanner bs = conn.createBatchScanner(opts.getTableName(), opts.auths, bsOpts.scanThreads);
     bs.setTimeout(bsOpts.scanTimeout, TimeUnit.MILLISECONDS);
-
-    for (String entry : query(bs, opts.terms))
+    if (opts.useSample) {
+      SamplerConfiguration samplerConfig = conn.tableOperations().getSamplerConfiguration(opts.getTableName());
+      CutoffIntersectingIterator.validateSamplerConfig(conn.tableOperations().getSamplerConfiguration(opts.getTableName()));
+      bs.setSamplerConfiguration(samplerConfig);
+    }
+    for (String entry : query(bs, opts.terms, opts.sampleCutoff))
       System.out.println("  " + entry);
+
+    bs.close();
   }
 
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/server/base/src/main/java/org/apache/accumulo/server/util/VerifyTabletAssignments.java
----------------------------------------------------------------------
diff --git a/server/base/src/main/java/org/apache/accumulo/server/util/VerifyTabletAssignments.java b/server/base/src/main/java/org/apache/accumulo/server/util/VerifyTabletAssignments.java
index 0d7ade8..d2d6664 100644
--- a/server/base/src/main/java/org/apache/accumulo/server/util/VerifyTabletAssignments.java
+++ b/server/base/src/main/java/org/apache/accumulo/server/util/VerifyTabletAssignments.java
@@ -189,7 +189,7 @@ public class VerifyTabletAssignments {
     List<IterInfo> emptyListIterInfo = Collections.emptyList();
     List<TColumn> emptyListColumn = Collections.emptyList();
     InitialMultiScan is = client.startMultiScan(tinfo, context.rpcCreds(), batch, emptyListColumn, emptyListIterInfo, emptyMapSMapSS,
-        Authorizations.EMPTY.getAuthorizationsBB(), false, 0L);
+        Authorizations.EMPTY.getAuthorizationsBB(), false, null, 0L);
     if (is.result.more) {
       MultiScanResult result = client.continueMultiScan(tinfo, is.scanID);
       checkFailures(entry.getKey(), failures, result);

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/server/base/src/test/java/org/apache/accumulo/server/iterators/MetadataBulkLoadFilterTest.java
----------------------------------------------------------------------
diff --git a/server/base/src/test/java/org/apache/accumulo/server/iterators/MetadataBulkLoadFilterTest.java b/server/base/src/test/java/org/apache/accumulo/server/iterators/MetadataBulkLoadFilterTest.java
index 7e9543f..1b30530 100644
--- a/server/base/src/test/java/org/apache/accumulo/server/iterators/MetadataBulkLoadFilterTest.java
+++ b/server/base/src/test/java/org/apache/accumulo/server/iterators/MetadataBulkLoadFilterTest.java
@@ -21,18 +21,15 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.TreeMap;
 
-import org.apache.accumulo.core.conf.AccumuloConfiguration;
+import org.apache.accumulo.core.client.impl.BaseIteratorEnvironment;
 import org.apache.accumulo.core.data.ByteSequence;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.iterators.IteratorEnvironment;
 import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
-import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 import org.apache.accumulo.core.iterators.SortedMapIterator;
 import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection;
 import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.DataFileColumnFamily;
-import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.util.ColumnFQ;
 import org.apache.accumulo.fate.zookeeper.TransactionWatcher.Arbitrator;
 import org.apache.hadoop.io.Text;
@@ -104,20 +101,7 @@ public class MetadataBulkLoadFilterTest {
     put(tm1, "2<", TabletsSection.BulkFileColumnFamily.NAME, "/t2/fileA", "2");
 
     TestMetadataBulkLoadFilter iter = new TestMetadataBulkLoadFilter();
-    iter.init(new SortedMapIterator(tm1), new HashMap<String,String>(), new IteratorEnvironment() {
-
-      @Override
-      public SortedKeyValueIterator<Key,Value> reserveMapFileReader(String mapFileName) throws IOException {
-        return null;
-      }
-
-      @Override
-      public void registerSideChannel(SortedKeyValueIterator<Key,Value> iter) {}
-
-      @Override
-      public Authorizations getAuthorizations() {
-        return null;
-      }
+    iter.init(new SortedMapIterator(tm1), new HashMap<String,String>(), new BaseIteratorEnvironment() {
 
       @Override
       public boolean isFullMajorCompaction() {
@@ -128,11 +112,6 @@ public class MetadataBulkLoadFilterTest {
       public IteratorScope getIteratorScope() {
         return IteratorScope.majc;
       }
-
-      @Override
-      public AccumuloConfiguration getConfig() {
-        return null;
-      }
     });
 
     iter.seek(new Range(), new ArrayList<ByteSequence>(), false);

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/server/base/src/test/java/org/apache/accumulo/server/replication/StatusCombinerTest.java
----------------------------------------------------------------------
diff --git a/server/base/src/test/java/org/apache/accumulo/server/replication/StatusCombinerTest.java b/server/base/src/test/java/org/apache/accumulo/server/replication/StatusCombinerTest.java
index f4d5a9b..26ad8de 100644
--- a/server/base/src/test/java/org/apache/accumulo/server/replication/StatusCombinerTest.java
+++ b/server/base/src/test/java/org/apache/accumulo/server/replication/StatusCombinerTest.java
@@ -24,16 +24,10 @@ import java.util.List;
 
 import org.apache.accumulo.core.client.IteratorSetting;
 import org.apache.accumulo.core.client.IteratorSetting.Column;
-import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.iterators.Combiner;
 import org.apache.accumulo.core.iterators.DevNull;
-import org.apache.accumulo.core.iterators.IteratorEnvironment;
-import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
-import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 import org.apache.accumulo.core.replication.ReplicationSchema.StatusSection;
-import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.server.replication.proto.Replication.Status;
 import org.junit.Assert;
 import org.junit.Before;
@@ -52,38 +46,7 @@ public class StatusCombinerTest {
     builder = Status.newBuilder();
     IteratorSetting cfg = new IteratorSetting(50, StatusCombiner.class);
     Combiner.setColumns(cfg, Collections.singletonList(new Column(StatusSection.NAME)));
-    combiner.init(new DevNull(), cfg.getOptions(), new IteratorEnvironment() {
-
-      @Override
-      public AccumuloConfiguration getConfig() {
-        return null;
-      }
-
-      @Override
-      public IteratorScope getIteratorScope() {
-        return null;
-      }
-
-      @Override
-      public boolean isFullMajorCompaction() {
-        return false;
-      }
-
-      @Override
-      public void registerSideChannel(SortedKeyValueIterator<Key,Value> arg0) {
-
-      }
-
-      @Override
-      public Authorizations getAuthorizations() {
-        return null;
-      }
-
-      @Override
-      public SortedKeyValueIterator<Key,Value> reserveMapFileReader(String arg0) throws IOException {
-        return null;
-      }
-    });
+    combiner.init(new DevNull(), cfg.getOptions(), null);
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/server/monitor/src/main/java/org/apache/accumulo/monitor/servlets/trace/NullScanner.java
----------------------------------------------------------------------
diff --git a/server/monitor/src/main/java/org/apache/accumulo/monitor/servlets/trace/NullScanner.java b/server/monitor/src/main/java/org/apache/accumulo/monitor/servlets/trace/NullScanner.java
index 750ad8e..2c46835 100644
--- a/server/monitor/src/main/java/org/apache/accumulo/monitor/servlets/trace/NullScanner.java
+++ b/server/monitor/src/main/java/org/apache/accumulo/monitor/servlets/trace/NullScanner.java
@@ -23,6 +23,7 @@ import java.util.concurrent.TimeUnit;
 import org.apache.accumulo.core.client.IteratorSetting;
 import org.apache.accumulo.core.client.IteratorSetting.Column;
 import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
@@ -134,4 +135,14 @@ public class NullScanner implements Scanner {
     return 0;
   }
 
+  @Override
+  public void setSamplerConfiguration(SamplerConfiguration samplerConfig) {}
+
+  @Override
+  public SamplerConfiguration getSamplerConfiguration() {
+    return null;
+  }
+
+  @Override
+  public void clearSamplerConfiguration() {}
 }


[2/7] accumulo git commit: ACCUMULO-3913 Added per table sampling

Posted by kt...@apache.org.
http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/server/tserver/src/main/java/org/apache/accumulo/tserver/FileManager.java
----------------------------------------------------------------------
diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/FileManager.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/FileManager.java
index 1c4676e..2227b25 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/FileManager.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/FileManager.java
@@ -29,6 +29,7 @@ import java.util.Map.Entry;
 import java.util.concurrent.Semaphore;
 import java.util.concurrent.atomic.AtomicBoolean;
 
+import org.apache.accumulo.core.client.SampleNotPresentException;
 import org.apache.accumulo.core.conf.Property;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Value;
@@ -43,6 +44,7 @@ import org.apache.accumulo.core.iterators.system.SourceSwitchingIterator;
 import org.apache.accumulo.core.iterators.system.SourceSwitchingIterator.DataSource;
 import org.apache.accumulo.core.iterators.system.TimeSettingIterator;
 import org.apache.accumulo.core.metadata.schema.DataFileValue;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.accumulo.server.AccumuloServerContext;
 import org.apache.accumulo.server.fs.FileRef;
 import org.apache.accumulo.server.fs.VolumeManager;
@@ -458,7 +460,6 @@ public class FileManager {
       this.iflag = flag;
       ((InterruptibleIterator) this.iter).setInterruptFlag(iflag);
     }
-
   }
 
   public class ScanFileManager {
@@ -502,7 +503,8 @@ public class FileManager {
       return newlyReservedReaders;
     }
 
-    public synchronized List<InterruptibleIterator> openFiles(Map<FileRef,DataFileValue> files, boolean detachable) throws IOException {
+    public synchronized List<InterruptibleIterator> openFiles(Map<FileRef,DataFileValue> files, boolean detachable, SamplerConfigurationImpl samplerConfig)
+        throws IOException {
 
       List<FileSKVIterator> newlyReservedReaders = openFileRefs(files.keySet());
 
@@ -511,13 +513,22 @@ public class FileManager {
       for (FileSKVIterator reader : newlyReservedReaders) {
         String filename = getReservedReadeFilename(reader);
         InterruptibleIterator iter;
+
+        FileSKVIterator source = reader;
+        if (samplerConfig != null) {
+          source = source.getSample(samplerConfig);
+          if (source == null) {
+            throw new SampleNotPresentException();
+          }
+        }
+
         if (detachable) {
-          FileDataSource fds = new FileDataSource(filename, reader);
+          FileDataSource fds = new FileDataSource(filename, source);
           dataSources.add(fds);
           SourceSwitchingIterator ssi = new SourceSwitchingIterator(fds);
           iter = new ProblemReportingIterator(context, tablet.getTableId().toString(), filename, continueOnFailure, ssi);
         } else {
-          iter = new ProblemReportingIterator(context, tablet.getTableId().toString(), filename, continueOnFailure, reader);
+          iter = new ProblemReportingIterator(context, tablet.getTableId().toString(), filename, continueOnFailure, source);
         }
         DataFileValue value = files.get(new FileRef(filename));
         if (value.isTimeSet()) {
@@ -539,7 +550,7 @@ public class FileManager {
         fds.unsetIterator();
     }
 
-    public synchronized void reattach() throws IOException {
+    public synchronized void reattach(SamplerConfigurationImpl samplerConfig) throws IOException {
       if (tabletReservedReaders.size() != 0)
         throw new IllegalStateException();
 
@@ -562,7 +573,14 @@ public class FileManager {
 
       for (FileDataSource fds : dataSources) {
         FileSKVIterator reader = map.get(fds.file).remove(0);
-        fds.setIterator(reader);
+        FileSKVIterator source = reader;
+        if (samplerConfig != null) {
+          source = source.getSample(samplerConfig);
+          if (source == null) {
+            throw new SampleNotPresentException();
+          }
+        }
+        fds.setIterator(source);
       }
     }
 

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/server/tserver/src/main/java/org/apache/accumulo/tserver/InMemoryMap.java
----------------------------------------------------------------------
diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/InMemoryMap.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/InMemoryMap.java
index 2274eea..f5141ff 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/InMemoryMap.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/InMemoryMap.java
@@ -16,6 +16,8 @@
  */
 package org.apache.accumulo.tserver;
 
+import static com.google.common.util.concurrent.Uninterruptibles.sleepUninterruptibly;
+
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -33,8 +35,11 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.AtomicReference;
 
+import org.apache.accumulo.core.client.SampleNotPresentException;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
+import org.apache.accumulo.core.conf.ConfigurationCopy;
 import org.apache.accumulo.core.conf.Property;
 import org.apache.accumulo.core.conf.SiteConfiguration;
 import org.apache.accumulo.core.data.ByteSequence;
@@ -51,15 +56,20 @@ import org.apache.accumulo.core.iterators.IteratorEnvironment;
 import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 import org.apache.accumulo.core.iterators.SortedMapIterator;
 import org.apache.accumulo.core.iterators.WrappingIterator;
+import org.apache.accumulo.core.iterators.system.EmptyIterator;
 import org.apache.accumulo.core.iterators.system.InterruptibleIterator;
 import org.apache.accumulo.core.iterators.system.LocalityGroupIterator;
 import org.apache.accumulo.core.iterators.system.LocalityGroupIterator.LocalityGroup;
 import org.apache.accumulo.core.iterators.system.SourceSwitchingIterator;
 import org.apache.accumulo.core.iterators.system.SourceSwitchingIterator.DataSource;
+import org.apache.accumulo.core.sample.Sampler;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
+import org.apache.accumulo.core.sample.impl.SamplerFactory;
 import org.apache.accumulo.core.util.CachedConfiguration;
 import org.apache.accumulo.core.util.LocalityGroupUtil;
 import org.apache.accumulo.core.util.LocalityGroupUtil.LocalityGroupConfigurationError;
 import org.apache.accumulo.core.util.LocalityGroupUtil.Partitioner;
+import org.apache.accumulo.core.util.Pair;
 import org.apache.accumulo.core.util.PreAllocatedArray;
 import org.apache.commons.lang.mutable.MutableLong;
 import org.apache.hadoop.conf.Configuration;
@@ -68,7 +78,8 @@ import org.apache.hadoop.fs.Path;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import static com.google.common.util.concurrent.Uninterruptibles.sleepUninterruptibly;
+import com.google.common.base.Predicate;
+import com.google.common.collect.Iterables;
 
 public class InMemoryMap {
   private SimpleMap map = null;
@@ -80,22 +91,58 @@ public class InMemoryMap {
 
   private Map<String,Set<ByteSequence>> lggroups;
 
-  public InMemoryMap(boolean useNativeMap, String memDumpDir) {
-    this(new HashMap<String,Set<ByteSequence>>(), useNativeMap, memDumpDir);
+  private static Pair<SamplerConfigurationImpl,Sampler> getSampler(AccumuloConfiguration config) {
+    try {
+      SamplerConfigurationImpl sampleConfig = SamplerConfigurationImpl.newSamplerConfig(config);
+      if (sampleConfig == null) {
+        return new Pair<>(null, null);
+      }
+
+      return new Pair<>(sampleConfig, SamplerFactory.newSampler(sampleConfig, config));
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
   }
 
-  public InMemoryMap(Map<String,Set<ByteSequence>> lggroups, boolean useNativeMap, String memDumpDir) {
-    this.memDumpDir = memDumpDir;
-    this.lggroups = lggroups;
+  private AtomicReference<Pair<SamplerConfigurationImpl,Sampler>> samplerRef = new AtomicReference<>(null);
 
-    if (lggroups.size() == 0)
-      map = newMap(useNativeMap);
-    else
-      map = new LocalityGroupMap(lggroups, useNativeMap);
+  private AccumuloConfiguration config;
+
+  // defer creating sampler until first write. This was done because an empty sample map configured with no sampler will not flush after a user changes sample
+  // config.
+  private Sampler getOrCreateSampler() {
+    Pair<SamplerConfigurationImpl,Sampler> pair = samplerRef.get();
+    if (pair == null) {
+      pair = getSampler(config);
+      if (!samplerRef.compareAndSet(null, pair)) {
+        pair = samplerRef.get();
+      }
+    }
+
+    return pair.getSecond();
   }
 
   public InMemoryMap(AccumuloConfiguration config) throws LocalityGroupConfigurationError {
-    this(LocalityGroupUtil.getLocalityGroups(config), config.getBoolean(Property.TSERV_NATIVEMAP_ENABLED), config.get(Property.TSERV_MEMDUMP_DIR));
+
+    boolean useNativeMap = config.getBoolean(Property.TSERV_NATIVEMAP_ENABLED);
+
+    this.memDumpDir = config.get(Property.TSERV_MEMDUMP_DIR);
+    this.lggroups = LocalityGroupUtil.getLocalityGroups(config);
+
+    this.config = config;
+
+    SimpleMap allMap;
+    SimpleMap sampleMap;
+
+    if (lggroups.size() == 0) {
+      allMap = newMap(useNativeMap);
+      sampleMap = newMap(useNativeMap);
+    } else {
+      allMap = new LocalityGroupMap(lggroups, useNativeMap);
+      sampleMap = new LocalityGroupMap(lggroups, useNativeMap);
+    }
+
+    map = new SampleMap(allMap, sampleMap);
   }
 
   private static SimpleMap newMap(boolean useNativeMap) {
@@ -117,7 +164,7 @@ public class InMemoryMap {
 
     int size();
 
-    InterruptibleIterator skvIterator();
+    InterruptibleIterator skvIterator(SamplerConfigurationImpl samplerConfig);
 
     void delete();
 
@@ -126,6 +173,95 @@ public class InMemoryMap {
     void mutate(List<Mutation> mutations, int kvCount);
   }
 
+  private class SampleMap implements SimpleMap {
+
+    private SimpleMap map;
+    private SimpleMap sample;
+
+    public SampleMap(SimpleMap map, SimpleMap sampleMap) {
+      this.map = map;
+      this.sample = sampleMap;
+    }
+
+    @Override
+    public Value get(Key key) {
+      return map.get(key);
+    }
+
+    @Override
+    public Iterator<Entry<Key,Value>> iterator(Key startKey) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public int size() {
+      return map.size();
+    }
+
+    @Override
+    public InterruptibleIterator skvIterator(SamplerConfigurationImpl samplerConfig) {
+      if (samplerConfig == null)
+        return map.skvIterator(null);
+      else {
+        Pair<SamplerConfigurationImpl,Sampler> samplerAndConf = samplerRef.get();
+        if (samplerAndConf == null) {
+          return EmptyIterator.EMPTY_ITERATOR;
+        } else if (samplerAndConf.getFirst() != null && samplerAndConf.getFirst().equals(samplerConfig)) {
+          return sample.skvIterator(null);
+        } else {
+          throw new SampleNotPresentException();
+        }
+      }
+    }
+
+    @Override
+    public void delete() {
+      map.delete();
+      sample.delete();
+    }
+
+    @Override
+    public long getMemoryUsed() {
+      return map.getMemoryUsed() + sample.getMemoryUsed();
+    }
+
+    @Override
+    public void mutate(List<Mutation> mutations, int kvCount) {
+      map.mutate(mutations, kvCount);
+
+      Sampler sampler = getOrCreateSampler();
+      if (sampler != null) {
+        List<Mutation> sampleMutations = null;
+
+        for (Mutation m : mutations) {
+          List<ColumnUpdate> colUpdates = m.getUpdates();
+          List<ColumnUpdate> sampleColUpdates = null;
+          for (ColumnUpdate cvp : colUpdates) {
+            Key k = new Key(m.getRow(), cvp.getColumnFamily(), cvp.getColumnQualifier(), cvp.getColumnVisibility(), cvp.getTimestamp(), cvp.isDeleted(), false);
+            if (sampler.accept(k)) {
+              if (sampleColUpdates == null) {
+                sampleColUpdates = new ArrayList<>();
+              }
+              sampleColUpdates.add(cvp);
+            }
+          }
+
+          if (sampleColUpdates != null) {
+            if (sampleMutations == null) {
+              sampleMutations = new ArrayList<>();
+            }
+
+            sampleMutations.add(new LocalityGroupUtil.PartitionedMutation(m.getRow(), sampleColUpdates));
+          }
+        }
+
+        if (sampleMutations != null) {
+          sample.mutate(sampleMutations, kvCount);
+        }
+      }
+    }
+  }
+
   private static class LocalityGroupMap implements SimpleMap {
 
     private PreAllocatedArray<Map<ByteSequence,MutableLong>> groupFams;
@@ -181,13 +317,16 @@ public class InMemoryMap {
     }
 
     @Override
-    public InterruptibleIterator skvIterator() {
+    public InterruptibleIterator skvIterator(SamplerConfigurationImpl samplerConfig) {
+      if (samplerConfig != null)
+        throw new SampleNotPresentException();
+
       LocalityGroup groups[] = new LocalityGroup[maps.length];
       for (int i = 0; i < groups.length; i++) {
         if (i < groupFams.length)
-          groups[i] = new LocalityGroup(maps[i].skvIterator(), groupFams.get(i), false);
+          groups[i] = new LocalityGroup(maps[i].skvIterator(null), groupFams.get(i), false);
         else
-          groups[i] = new LocalityGroup(maps[i].skvIterator(), null, true);
+          groups[i] = new LocalityGroup(maps[i].skvIterator(null), null, true);
       }
 
       return new LocalityGroupIterator(groups, nonDefaultColumnFamilies);
@@ -264,7 +403,9 @@ public class InMemoryMap {
     }
 
     @Override
-    public synchronized InterruptibleIterator skvIterator() {
+    public InterruptibleIterator skvIterator(SamplerConfigurationImpl samplerConfig) {
+      if (samplerConfig != null)
+        throw new SampleNotPresentException();
       if (map == null)
         throw new IllegalStateException();
 
@@ -327,7 +468,9 @@ public class InMemoryMap {
     }
 
     @Override
-    public InterruptibleIterator skvIterator() {
+    public InterruptibleIterator skvIterator(SamplerConfigurationImpl samplerConfig) {
+      if (samplerConfig != null)
+        throw new SampleNotPresentException();
       return (InterruptibleIterator) nativeMap.skvIterator();
     }
 
@@ -410,16 +553,30 @@ public class InMemoryMap {
     private MemoryDataSource parent;
     private IteratorEnvironment env;
     private AtomicBoolean iflag;
+    private SamplerConfigurationImpl iteratorSamplerConfig;
+
+    private SamplerConfigurationImpl getSamplerConfig() {
+      if (env != null) {
+        if (env.isSamplingEnabled()) {
+          return new SamplerConfigurationImpl(env.getSamplerConfiguration());
+        } else {
+          return null;
+        }
+      } else {
+        return iteratorSamplerConfig;
+      }
+    }
 
-    MemoryDataSource() {
-      this(null, false, null, null);
+    MemoryDataSource(SamplerConfigurationImpl samplerConfig) {
+      this(null, false, null, null, samplerConfig);
     }
 
-    public MemoryDataSource(MemoryDataSource parent, boolean switched, IteratorEnvironment env, AtomicBoolean iflag) {
+    public MemoryDataSource(MemoryDataSource parent, boolean switched, IteratorEnvironment env, AtomicBoolean iflag, SamplerConfigurationImpl samplerConfig) {
       this.parent = parent;
       this.switched = switched;
       this.env = env;
       this.iflag = iflag;
+      this.iteratorSamplerConfig = samplerConfig;
     }
 
     @Override
@@ -457,6 +614,10 @@ public class InMemoryMap {
         reader = new RFileOperations().openReader(memDumpFile, true, fs, conf, SiteConfiguration.getInstance());
         if (iflag != null)
           reader.setInterruptFlag(iflag);
+
+        if (getSamplerConfig() != null) {
+          reader = reader.getSample(getSamplerConfig());
+        }
       }
 
       return reader;
@@ -466,7 +627,7 @@ public class InMemoryMap {
     public SortedKeyValueIterator<Key,Value> iterator() throws IOException {
       if (iter == null)
         if (!switched) {
-          iter = map.skvIterator();
+          iter = map.skvIterator(getSamplerConfig());
           if (iflag != null)
             iter.setInterruptFlag(iflag);
         } else {
@@ -485,7 +646,7 @@ public class InMemoryMap {
 
     @Override
     public DataSource getDeepCopyDataSource(IteratorEnvironment env) {
-      return new MemoryDataSource(parent == null ? this : parent, switched, env, iflag);
+      return new MemoryDataSource(parent == null ? this : parent, switched, env, iflag, iteratorSamplerConfig);
     }
 
     @Override
@@ -562,7 +723,7 @@ public class InMemoryMap {
 
   }
 
-  public synchronized MemoryIterator skvIterator() {
+  public synchronized MemoryIterator skvIterator(SamplerConfigurationImpl iteratorSamplerConfig) {
     if (map == null)
       throw new NullPointerException();
 
@@ -570,8 +731,9 @@ public class InMemoryMap {
       throw new IllegalStateException("Can not obtain iterator after map deleted");
 
     int mc = kvCount.get();
-    MemoryDataSource mds = new MemoryDataSource();
-    SourceSwitchingIterator ssi = new SourceSwitchingIterator(new MemoryDataSource());
+    MemoryDataSource mds = new MemoryDataSource(iteratorSamplerConfig);
+    // TODO seems like a bug that two MemoryDataSources are created... may need to fix in older branches
+    SourceSwitchingIterator ssi = new SourceSwitchingIterator(mds);
     MemoryIterator mi = new MemoryIterator(new PartialMutationSkippingIterator(ssi, mc));
     mi.setSSI(ssi);
     mi.setMDS(mds);
@@ -584,7 +746,7 @@ public class InMemoryMap {
     if (nextKVCount.get() - 1 != kvCount.get())
       throw new IllegalStateException("Memory map in unexpected state : nextKVCount = " + nextKVCount.get() + " kvCount = " + kvCount.get());
 
-    return map.skvIterator();
+    return map.skvIterator(null);
   }
 
   private boolean deleted = false;
@@ -615,9 +777,15 @@ public class InMemoryMap {
         Configuration newConf = new Configuration(conf);
         newConf.setInt("io.seqfile.compress.blocksize", 100000);
 
-        FileSKVWriter out = new RFileOperations().openWriter(tmpFile, fs, newConf, SiteConfiguration.getInstance());
+        AccumuloConfiguration siteConf = SiteConfiguration.getInstance();
 
-        InterruptibleIterator iter = map.skvIterator();
+        if (getOrCreateSampler() != null) {
+          siteConf = createSampleConfig(siteConf);
+        }
+
+        FileSKVWriter out = new RFileOperations().openWriter(tmpFile, fs, newConf, siteConf);
+
+        InterruptibleIterator iter = map.skvIterator(null);
 
         HashSet<ByteSequence> allfams = new HashSet<ByteSequence>();
 
@@ -668,14 +836,28 @@ public class InMemoryMap {
     tmpMap.delete();
   }
 
+  private AccumuloConfiguration createSampleConfig(AccumuloConfiguration siteConf) {
+    ConfigurationCopy confCopy = new ConfigurationCopy(Iterables.filter(siteConf, new Predicate<Entry<String,String>>() {
+      @Override
+      public boolean apply(Entry<String,String> input) {
+        return !input.getKey().startsWith(Property.TABLE_SAMPLER.getKey());
+      }
+    }));
+
+    for (Entry<String,String> entry : samplerRef.get().getFirst().toTablePropertiesMap().entrySet()) {
+      confCopy.set(entry.getKey(), entry.getValue());
+    }
+
+    siteConf = confCopy;
+    return siteConf;
+  }
+
   private void dumpLocalityGroup(FileSKVWriter out, InterruptibleIterator iter) throws IOException {
     while (iter.hasTop() && activeIters.size() > 0) {
       // RFile does not support MemKey, so we move the kv count into the value only for the RFile.
       // There is no need to change the MemKey to a normal key because the kvCount info gets lost when it is written
-      Value newValue = new MemValue(iter.getTopValue(), ((MemKey) iter.getTopKey()).kvCount);
-      out.append(iter.getTopKey(), newValue);
+      out.append(iter.getTopKey(), MemValue.encode(iter.getTopValue(), ((MemKey) iter.getTopKey()).kvCount));
       iter.next();
-
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/server/tserver/src/main/java/org/apache/accumulo/tserver/MemKeyConversionIterator.java
----------------------------------------------------------------------
diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/MemKeyConversionIterator.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/MemKeyConversionIterator.java
index 00c8be9..71a4cbd 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/MemKeyConversionIterator.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/MemKeyConversionIterator.java
@@ -61,10 +61,10 @@ class MemKeyConversionIterator extends WrappingIterator implements Interruptible
       currVal = v;
       return;
     }
-    currVal = new Value(v);
-    int mc = MemValue.splitKVCount(currVal);
-    currKey = new MemKey(k, mc);
 
+    MemValue mv = MemValue.decode(v);
+    currVal = mv.value;
+    currKey = new MemKey(k, mv.kvCount);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/server/tserver/src/main/java/org/apache/accumulo/tserver/MemValue.java
----------------------------------------------------------------------
diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/MemValue.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/MemValue.java
index bc44459..af6f2f1 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/MemValue.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/MemValue.java
@@ -16,69 +16,38 @@
  */
 package org.apache.accumulo.tserver;
 
-import java.io.DataOutput;
-import java.io.IOException;
-
 import org.apache.accumulo.core.data.Value;
 
 /**
  *
  */
-public class MemValue extends Value {
-  int kvCount;
-  boolean merged = false;
+public class MemValue {
 
-  public MemValue() {
-    super();
-    this.kvCount = Integer.MAX_VALUE;
-  }
+  Value value;
+  int kvCount;
 
   public MemValue(Value value, int kv) {
-    super(value);
+    this.value = value;
     this.kvCount = kv;
   }
 
-  // Override
-  @Override
-  public void write(final DataOutput out) throws IOException {
-    if (!merged) {
-      byte[] combinedBytes = new byte[getSize() + 4];
-      System.arraycopy(value, 0, combinedBytes, 4, getSize());
-      combinedBytes[0] = (byte) (kvCount >>> 24);
-      combinedBytes[1] = (byte) (kvCount >>> 16);
-      combinedBytes[2] = (byte) (kvCount >>> 8);
-      combinedBytes[3] = (byte) (kvCount);
-      value = combinedBytes;
-      merged = true;
-    }
-    super.write(out);
-  }
-
-  @Override
-  public void set(final byte[] b) {
-    super.set(b);
-    merged = false;
-  }
-
-  @Override
-  public void copy(byte[] b) {
-    super.copy(b);
-    merged = false;
+  public static Value encode(Value value, int kv) {
+    byte[] combinedBytes = new byte[value.getSize() + 4];
+    System.arraycopy(value.get(), 0, combinedBytes, 4, value.getSize());
+    combinedBytes[0] = (byte) (kv >>> 24);
+    combinedBytes[1] = (byte) (kv >>> 16);
+    combinedBytes[2] = (byte) (kv >>> 8);
+    combinedBytes[3] = (byte) (kv);
+    return new Value(combinedBytes);
   }
 
-  /**
-   * Takes a Value and will take out the embedded kvCount, and then return that value while replacing the Value with the original unembedded version
-   *
-   * @return The kvCount embedded in v.
-   */
-  public static int splitKVCount(Value v) {
-    if (v instanceof MemValue)
-      return ((MemValue) v).kvCount;
-
+  public static MemValue decode(Value v) {
     byte[] originalBytes = new byte[v.getSize() - 4];
     byte[] combined = v.get();
     System.arraycopy(combined, 4, originalBytes, 0, originalBytes.length);
     v.set(originalBytes);
-    return (combined[0] << 24) + ((combined[1] & 0xFF) << 16) + ((combined[2] & 0xFF) << 8) + (combined[3] & 0xFF);
+    int kv = (combined[0] << 24) + ((combined[1] & 0xFF) << 16) + ((combined[2] & 0xFF) << 8) + (combined[3] & 0xFF);
+
+    return new MemValue(new Value(originalBytes), kv);
   }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/server/tserver/src/main/java/org/apache/accumulo/tserver/NativeMap.java
----------------------------------------------------------------------
diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/NativeMap.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/NativeMap.java
index cf01dd3..3cb4d40 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/NativeMap.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/NativeMap.java
@@ -34,6 +34,7 @@ import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReadWriteLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 
+import org.apache.accumulo.core.client.SampleNotPresentException;
 import org.apache.accumulo.core.data.ByteSequence;
 import org.apache.accumulo.core.data.ColumnUpdate;
 import org.apache.accumulo.core.data.Key;
@@ -749,6 +750,9 @@ public class NativeMap implements Iterable<Map.Entry<Key,Value>> {
 
     @Override
     public SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) {
+      if (env != null && env.isSamplingEnabled()) {
+        throw new SampleNotPresentException();
+      }
       return new NMSKVIter(map, interruptFlag);
     }
 

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletIteratorEnvironment.java
----------------------------------------------------------------------
diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletIteratorEnvironment.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletIteratorEnvironment.java
index 6c5b63d..73adec3 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletIteratorEnvironment.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletIteratorEnvironment.java
@@ -21,6 +21,8 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Map;
 
+import org.apache.accumulo.core.client.SampleNotPresentException;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Value;
@@ -29,6 +31,7 @@ import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
 import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 import org.apache.accumulo.core.iterators.system.MultiIterator;
 import org.apache.accumulo.core.metadata.schema.DataFileValue;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.server.fs.FileRef;
 import org.apache.accumulo.tserver.FileManager.ScanFileManager;
@@ -40,10 +43,12 @@ public class TabletIteratorEnvironment implements IteratorEnvironment {
   private final IteratorScope scope;
   private final boolean fullMajorCompaction;
   private final AccumuloConfiguration config;
-  private final ArrayList<SortedKeyValueIterator<Key,Value>> topLevelIterators = new ArrayList<SortedKeyValueIterator<Key,Value>>();
+  private final ArrayList<SortedKeyValueIterator<Key,Value>> topLevelIterators;
   private Map<FileRef,DataFileValue> files;
 
   private final Authorizations authorizations; // these will only be supplied during scan scope
+  private SamplerConfiguration samplerConfig;
+  private boolean enableSampleForDeepCopy;
 
   public TabletIteratorEnvironment(IteratorScope scope, AccumuloConfiguration config) {
     if (scope == IteratorScope.majc)
@@ -54,10 +59,11 @@ public class TabletIteratorEnvironment implements IteratorEnvironment {
     this.config = config;
     this.fullMajorCompaction = false;
     this.authorizations = Authorizations.EMPTY;
+    this.topLevelIterators = new ArrayList<>();
   }
 
-  public TabletIteratorEnvironment(IteratorScope scope, AccumuloConfiguration config, ScanFileManager trm, Map<FileRef,DataFileValue> files,
-      Authorizations authorizations) {
+  private TabletIteratorEnvironment(IteratorScope scope, AccumuloConfiguration config, ScanFileManager trm, Map<FileRef,DataFileValue> files,
+      Authorizations authorizations, SamplerConfigurationImpl samplerConfig, ArrayList<SortedKeyValueIterator<Key,Value>> topLevelIterators) {
     if (scope == IteratorScope.majc)
       throw new IllegalArgumentException("must set if compaction is full");
 
@@ -67,6 +73,19 @@ public class TabletIteratorEnvironment implements IteratorEnvironment {
     this.fullMajorCompaction = false;
     this.files = files;
     this.authorizations = authorizations;
+    if (samplerConfig != null) {
+      enableSampleForDeepCopy = true;
+      this.samplerConfig = samplerConfig.toSamplerConfiguration();
+    } else {
+      enableSampleForDeepCopy = false;
+    }
+
+    this.topLevelIterators = topLevelIterators;
+  }
+
+  public TabletIteratorEnvironment(IteratorScope scope, AccumuloConfiguration config, ScanFileManager trm, Map<FileRef,DataFileValue> files,
+      Authorizations authorizations, SamplerConfigurationImpl samplerConfig) {
+    this(scope, config, trm, files, authorizations, samplerConfig, new ArrayList<SortedKeyValueIterator<Key,Value>>());
   }
 
   public TabletIteratorEnvironment(IteratorScope scope, boolean fullMajC, AccumuloConfiguration config) {
@@ -78,6 +97,7 @@ public class TabletIteratorEnvironment implements IteratorEnvironment {
     this.config = config;
     this.fullMajorCompaction = fullMajC;
     this.authorizations = Authorizations.EMPTY;
+    this.topLevelIterators = new ArrayList<SortedKeyValueIterator<Key,Value>>();
   }
 
   @Override
@@ -100,7 +120,7 @@ public class TabletIteratorEnvironment implements IteratorEnvironment {
   @Override
   public SortedKeyValueIterator<Key,Value> reserveMapFileReader(String mapFileName) throws IOException {
     FileRef ref = new FileRef(mapFileName, new Path(mapFileName));
-    return trm.openFiles(Collections.singletonMap(ref, files.get(ref)), false).get(0);
+    return trm.openFiles(Collections.singletonMap(ref, files.get(ref)), false, null).get(0);
   }
 
   @Override
@@ -122,4 +142,37 @@ public class TabletIteratorEnvironment implements IteratorEnvironment {
     allIters.add(iter);
     return new MultiIterator(allIters, false);
   }
+
+  @Override
+  public boolean isSamplingEnabled() {
+    return enableSampleForDeepCopy;
+  }
+
+  @Override
+  public SamplerConfiguration getSamplerConfiguration() {
+    if (samplerConfig == null) {
+      // only create this once so that it stays the same, even if config changes
+      SamplerConfigurationImpl sci = SamplerConfigurationImpl.newSamplerConfig(config);
+      if (sci == null) {
+        return null;
+      }
+      samplerConfig = sci.toSamplerConfiguration();
+    }
+    return samplerConfig;
+  }
+
+  @Override
+  public IteratorEnvironment cloneWithSamplingEnabled() {
+    if (!scope.equals(IteratorScope.scan)) {
+      throw new UnsupportedOperationException();
+    }
+
+    SamplerConfigurationImpl sci = SamplerConfigurationImpl.newSamplerConfig(config);
+    if (sci == null) {
+      throw new SampleNotPresentException();
+    }
+
+    TabletIteratorEnvironment te = new TabletIteratorEnvironment(scope, config, trm, files, authorizations, sci, topLevelIterators);
+    return te;
+  }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java
----------------------------------------------------------------------
diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java
index de89b50..d35e6af 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java
@@ -61,6 +61,7 @@ import org.apache.accumulo.core.client.AccumuloException;
 import org.apache.accumulo.core.client.AccumuloSecurityException;
 import org.apache.accumulo.core.client.Durability;
 import org.apache.accumulo.core.client.Instance;
+import org.apache.accumulo.core.client.SampleNotPresentException;
 import org.apache.accumulo.core.client.impl.CompressedIterators;
 import org.apache.accumulo.core.client.impl.CompressedIterators.IterConfig;
 import org.apache.accumulo.core.client.impl.DurabilityImpl;
@@ -114,6 +115,7 @@ import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection;
 import org.apache.accumulo.core.replication.ReplicationConstants;
 import org.apache.accumulo.core.replication.thrift.ReplicationServicer;
 import org.apache.accumulo.core.rpc.ThriftUtil;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.security.thrift.TCredentials;
 import org.apache.accumulo.core.tabletserver.log.LogEntry;
@@ -123,6 +125,8 @@ import org.apache.accumulo.core.tabletserver.thrift.ConstraintViolationException
 import org.apache.accumulo.core.tabletserver.thrift.NoSuchScanIDException;
 import org.apache.accumulo.core.tabletserver.thrift.NotServingTabletException;
 import org.apache.accumulo.core.tabletserver.thrift.TDurability;
+import org.apache.accumulo.core.tabletserver.thrift.TSampleNotPresentException;
+import org.apache.accumulo.core.tabletserver.thrift.TSamplerConfiguration;
 import org.apache.accumulo.core.tabletserver.thrift.TabletClientService;
 import org.apache.accumulo.core.tabletserver.thrift.TabletClientService.Iface;
 import org.apache.accumulo.core.tabletserver.thrift.TabletClientService.Processor;
@@ -447,8 +451,8 @@ public class TabletServer extends AccumuloServerContext implements Runnable {
     @Override
     public InitialScan startScan(TInfo tinfo, TCredentials credentials, TKeyExtent textent, TRange range, List<TColumn> columns, int batchSize,
         List<IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, boolean isolated,
-        long readaheadThreshold, long batchTimeOut) throws NotServingTabletException, ThriftSecurityException,
-        org.apache.accumulo.core.tabletserver.thrift.TooManyFilesException {
+        long readaheadThreshold, TSamplerConfiguration tSamplerConfig, long batchTimeOut) throws NotServingTabletException, ThriftSecurityException,
+        org.apache.accumulo.core.tabletserver.thrift.TooManyFilesException, TSampleNotPresentException {
 
       String tableId = new String(textent.getTable(), UTF_8);
       if (!security.canScan(credentials, tableId, Tables.getNamespaceId(getInstance(), tableId), range, columns, ssiList, ssio, authorizations))
@@ -480,10 +484,11 @@ public class TabletServer extends AccumuloServerContext implements Runnable {
       for (TColumn tcolumn : columns) {
         columnSet.add(new Column(tcolumn));
       }
+
       final ScanSession scanSession = new ScanSession(credentials, extent, columnSet, ssiList, ssio, new Authorizations(authorizations), readaheadThreshold,
           batchTimeOut);
       scanSession.scanner = tablet.createScanner(new Range(range), batchSize, scanSession.columnSet, scanSession.auths, ssiList, ssio, isolated,
-          scanSession.interruptFlag, scanSession.batchTimeOut);
+          scanSession.interruptFlag, SamplerConfigurationImpl.fromThrift(tSamplerConfig), scanSession.batchTimeOut);
 
       long sid = sessionManager.createSession(scanSession, true);
 
@@ -502,7 +507,7 @@ public class TabletServer extends AccumuloServerContext implements Runnable {
 
     @Override
     public ScanResult continueScan(TInfo tinfo, long scanID) throws NoSuchScanIDException, NotServingTabletException,
-        org.apache.accumulo.core.tabletserver.thrift.TooManyFilesException {
+        org.apache.accumulo.core.tabletserver.thrift.TooManyFilesException, TSampleNotPresentException {
       ScanSession scanSession = (ScanSession) sessionManager.reserveSession(scanID);
       if (scanSession == null) {
         throw new NoSuchScanIDException();
@@ -516,7 +521,7 @@ public class TabletServer extends AccumuloServerContext implements Runnable {
     }
 
     private ScanResult continueScan(TInfo tinfo, long scanID, ScanSession scanSession) throws NoSuchScanIDException, NotServingTabletException,
-        org.apache.accumulo.core.tabletserver.thrift.TooManyFilesException {
+        org.apache.accumulo.core.tabletserver.thrift.TooManyFilesException, TSampleNotPresentException {
 
       if (scanSession.nextBatchTask == null) {
         scanSession.nextBatchTask = new NextBatchTask(TabletServer.this, scanID, scanSession.interruptFlag);
@@ -533,6 +538,8 @@ public class TabletServer extends AccumuloServerContext implements Runnable {
           throw (NotServingTabletException) e.getCause();
         else if (e.getCause() instanceof TooManyFilesException)
           throw new org.apache.accumulo.core.tabletserver.thrift.TooManyFilesException(scanSession.extent.toThrift());
+        else if (e.getCause() instanceof SampleNotPresentException)
+          throw new TSampleNotPresentException(scanSession.extent.toThrift());
         else if (e.getCause() instanceof IOException) {
           sleepUninterruptibly(MAX_TIME_TO_WAIT_FOR_SCAN_RESULT_MILLIS, TimeUnit.MILLISECONDS);
           List<KVEntry> empty = Collections.emptyList();
@@ -595,8 +602,8 @@ public class TabletServer extends AccumuloServerContext implements Runnable {
 
     @Override
     public InitialMultiScan startMultiScan(TInfo tinfo, TCredentials credentials, Map<TKeyExtent,List<TRange>> tbatch, List<TColumn> tcolumns,
-        List<IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites, long batchTimeOut)
-        throws ThriftSecurityException {
+        List<IterInfo> ssiList, Map<String,Map<String,String>> ssio, List<ByteBuffer> authorizations, boolean waitForWrites,
+        TSamplerConfiguration tSamplerConfig, long batchTimeOut) throws ThriftSecurityException, TSampleNotPresentException {
       // find all of the tables that need to be scanned
       final HashSet<String> tables = new HashSet<String>();
       for (TKeyExtent keyExtent : tbatch.keySet()) {
@@ -627,7 +634,8 @@ public class TabletServer extends AccumuloServerContext implements Runnable {
       if (waitForWrites)
         writeTracker.waitForWrites(TabletType.type(batch.keySet()));
 
-      final MultiScanSession mss = new MultiScanSession(credentials, threadPoolExtent, batch, ssiList, ssio, new Authorizations(authorizations), batchTimeOut);
+      final MultiScanSession mss = new MultiScanSession(credentials, threadPoolExtent, batch, ssiList, ssio, new Authorizations(authorizations),
+          SamplerConfigurationImpl.fromThrift(tSamplerConfig), batchTimeOut);
 
       mss.numTablets = batch.size();
       for (List<Range> ranges : batch.values()) {
@@ -653,7 +661,7 @@ public class TabletServer extends AccumuloServerContext implements Runnable {
     }
 
     @Override
-    public MultiScanResult continueMultiScan(TInfo tinfo, long scanID) throws NoSuchScanIDException {
+    public MultiScanResult continueMultiScan(TInfo tinfo, long scanID) throws NoSuchScanIDException, TSampleNotPresentException {
 
       MultiScanSession session = (MultiScanSession) sessionManager.reserveSession(scanID);
 
@@ -668,7 +676,7 @@ public class TabletServer extends AccumuloServerContext implements Runnable {
       }
     }
 
-    private MultiScanResult continueMultiScan(TInfo tinfo, long scanID, MultiScanSession session) throws NoSuchScanIDException {
+    private MultiScanResult continueMultiScan(TInfo tinfo, long scanID, MultiScanSession session) throws NoSuchScanIDException, TSampleNotPresentException {
 
       if (session.lookupTask == null) {
         session.lookupTask = new LookupTask(TabletServer.this, scanID);
@@ -679,6 +687,14 @@ public class TabletServer extends AccumuloServerContext implements Runnable {
         MultiScanResult scanResult = session.lookupTask.get(MAX_TIME_TO_WAIT_FOR_SCAN_RESULT_MILLIS, TimeUnit.MILLISECONDS);
         session.lookupTask = null;
         return scanResult;
+      } catch (ExecutionException e) {
+        sessionManager.removeSession(scanID);
+        if (e.getCause() instanceof SampleNotPresentException) {
+          throw new TSampleNotPresentException();
+        } else {
+          log.warn("Failed to get multiscan result", e);
+          throw new RuntimeException(e);
+        }
       } catch (TimeoutException e1) {
         long timeout = TabletServer.this.getConfiguration().getTimeInMillis(Property.TSERV_CLIENT_TIMEOUT);
         sessionManager.removeIfNotAccessed(scanID, timeout);
@@ -1116,7 +1132,7 @@ public class TabletServer extends AccumuloServerContext implements Runnable {
 
         IterConfig ic = compressedIters.decompress(tc.iterators);
 
-        Scanner scanner = tablet.createScanner(range, 1, EMPTY_COLUMNS, cs.auths, ic.ssiList, ic.ssio, false, cs.interruptFlag, 0);
+        Scanner scanner = tablet.createScanner(range, 1, EMPTY_COLUMNS, cs.auths, ic.ssiList, ic.ssio, false, cs.interruptFlag, null, 0);
 
         try {
           ScanBatch batch = scanner.read();

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/strategies/ConfigurableCompactionStrategy.java
----------------------------------------------------------------------
diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/strategies/ConfigurableCompactionStrategy.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/strategies/ConfigurableCompactionStrategy.java
index b97b88b..04915ef 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/strategies/ConfigurableCompactionStrategy.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/strategies/ConfigurableCompactionStrategy.java
@@ -26,7 +26,10 @@ import java.util.Set;
 import java.util.regex.Pattern;
 
 import org.apache.accumulo.core.compaction.CompactionSettings;
+import org.apache.accumulo.core.conf.ConfigurationCopy;
+import org.apache.accumulo.core.file.FileSKVIterator;
 import org.apache.accumulo.core.metadata.schema.DataFileValue;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.accumulo.server.fs.FileRef;
 import org.apache.accumulo.tserver.compaction.CompactionPlan;
 import org.apache.accumulo.tserver.compaction.CompactionStrategy;
@@ -40,6 +43,22 @@ public class ConfigurableCompactionStrategy extends CompactionStrategy {
     boolean shouldCompact(Entry<FileRef,DataFileValue> file, MajorCompactionRequest request);
   }
 
+  private static class NoSampleTest implements Test {
+
+    @Override
+    public boolean shouldCompact(Entry<FileRef,DataFileValue> file, MajorCompactionRequest request) {
+      try (FileSKVIterator reader = request.openReader(file.getKey())) {
+        SamplerConfigurationImpl sc = SamplerConfigurationImpl.newSamplerConfig(new ConfigurationCopy(request.getTableProperties()));
+        if (sc == null) {
+          return false;
+        }
+        return reader.getSample(sc) == null;
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+  }
+
   private static abstract class FileSizeTest implements Test {
     private final long esize;
 
@@ -83,6 +102,9 @@ public class ConfigurableCompactionStrategy extends CompactionStrategy {
     for (Entry<String,String> entry : es) {
 
       switch (CompactionSettings.valueOf(entry.getKey())) {
+        case SF_NO_SAMPLE:
+          tests.add(new NoSampleTest());
+          break;
         case SF_LT_ESIZE_OPT:
           tests.add(new FileSizeTest(entry.getValue()) {
             @Override

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/server/tserver/src/main/java/org/apache/accumulo/tserver/scan/LookupTask.java
----------------------------------------------------------------------
diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/scan/LookupTask.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/scan/LookupTask.java
index 57a09ce..2d745cb 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/scan/LookupTask.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/scan/LookupTask.java
@@ -25,6 +25,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 
+import org.apache.accumulo.core.client.SampleNotPresentException;
 import org.apache.accumulo.core.client.impl.Translator;
 import org.apache.accumulo.core.client.impl.Translators;
 import org.apache.accumulo.core.conf.Property;
@@ -111,7 +112,7 @@ public class LookupTask extends ScanTask<MultiScanResult> {
             interruptFlag.set(true);
 
           lookupResult = tablet.lookup(entry.getValue(), session.columnSet, session.auths, results, maxResultsSize - bytesAdded, session.ssiList, session.ssio,
-              interruptFlag, session.batchTimeOut);
+              interruptFlag, session.samplerConfig, session.batchTimeOut);
 
           // if the tablet was closed it it possible that the
           // interrupt flag was set.... do not want it set for
@@ -163,6 +164,8 @@ public class LookupTask extends ScanTask<MultiScanResult> {
         log.warn("Iteration interrupted, when scan not cancelled", iie);
         addResult(iie);
       }
+    } catch (SampleNotPresentException e) {
+      addResult(e);
     } catch (Throwable e) {
       log.warn("exception while doing multi-scan ", e);
       addResult(e);

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/server/tserver/src/main/java/org/apache/accumulo/tserver/scan/NextBatchTask.java
----------------------------------------------------------------------
diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/scan/NextBatchTask.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/scan/NextBatchTask.java
index e3f4146..ec28367 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/scan/NextBatchTask.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/scan/NextBatchTask.java
@@ -18,6 +18,7 @@ package org.apache.accumulo.tserver.scan;
 
 import java.util.concurrent.atomic.AtomicBoolean;
 
+import org.apache.accumulo.core.client.SampleNotPresentException;
 import org.apache.accumulo.core.iterators.IterationInterruptedException;
 import org.apache.accumulo.server.util.Halt;
 import org.apache.accumulo.tserver.TabletServer;
@@ -84,8 +85,8 @@ public class NextBatchTask extends ScanTask<ScanBatch> {
         log.warn("Iteration interrupted, when scan not cancelled", iie);
         addResult(iie);
       }
-    } catch (TooManyFilesException tmfe) {
-      addResult(tmfe);
+    } catch (TooManyFilesException | SampleNotPresentException e) {
+      addResult(e);
     } catch (OutOfMemoryError ome) {
       Halt.halt("Ran out of memory scanning " + scanSession.extent + " for " + scanSession.client);
       addResult(ome);

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/server/tserver/src/main/java/org/apache/accumulo/tserver/session/MultiScanSession.java
----------------------------------------------------------------------
diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/session/MultiScanSession.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/session/MultiScanSession.java
index fccac47..16fc218 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/session/MultiScanSession.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/session/MultiScanSession.java
@@ -20,6 +20,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.data.Column;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.impl.KeyExtent;
@@ -36,6 +37,7 @@ public class MultiScanSession extends Session {
   public final List<IterInfo> ssiList;
   public final Map<String,Map<String,String>> ssio;
   public final Authorizations auths;
+  public final SamplerConfiguration samplerConfig;
   public final long batchTimeOut;
 
   // stats
@@ -47,13 +49,14 @@ public class MultiScanSession extends Session {
   public volatile ScanTask<MultiScanResult> lookupTask;
 
   public MultiScanSession(TCredentials credentials, KeyExtent threadPoolExtent, Map<KeyExtent,List<Range>> queries, List<IterInfo> ssiList,
-      Map<String,Map<String,String>> ssio, Authorizations authorizations, long batchTimeOut) {
+      Map<String,Map<String,String>> ssio, Authorizations authorizations, SamplerConfiguration samplerConfig, long batchTimeOut) {
     super(credentials);
     this.queries = queries;
     this.ssiList = ssiList;
     this.ssio = ssio;
     this.auths = authorizations;
     this.threadPoolExtent = threadPoolExtent;
+    this.samplerConfig = samplerConfig;
     this.batchTimeOut = batchTimeOut;
   }
 

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/ScanDataSource.java
----------------------------------------------------------------------
diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/ScanDataSource.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/ScanDataSource.java
index 853714a..72c289c 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/ScanDataSource.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/ScanDataSource.java
@@ -24,6 +24,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicBoolean;
 
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.data.Column;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Value;
@@ -42,6 +43,7 @@ import org.apache.accumulo.core.iterators.system.SourceSwitchingIterator.DataSou
 import org.apache.accumulo.core.iterators.system.StatsIterator;
 import org.apache.accumulo.core.iterators.system.VisibilityFilter;
 import org.apache.accumulo.core.metadata.schema.DataFileValue;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.util.Pair;
 import org.apache.accumulo.server.fs.FileRef;
@@ -50,6 +52,8 @@ import org.apache.accumulo.tserver.InMemoryMap.MemoryIterator;
 import org.apache.accumulo.tserver.TabletIteratorEnvironment;
 import org.apache.accumulo.tserver.TabletServer;
 
+import com.google.common.collect.Iterables;
+
 class ScanDataSource implements DataSource {
 
   // data source state
@@ -65,10 +69,10 @@ class ScanDataSource implements DataSource {
   private final ScanOptions options;
 
   ScanDataSource(Tablet tablet, Authorizations authorizations, byte[] defaultLabels, HashSet<Column> columnSet, List<IterInfo> ssiList,
-      Map<String,Map<String,String>> ssio, AtomicBoolean interruptFlag, long batchTimeOut) {
+      Map<String,Map<String,String>> ssio, AtomicBoolean interruptFlag, SamplerConfiguration samplerConfig, long batchTimeOut) {
     this.tablet = tablet;
     expectedDeletionCount = tablet.getDataSourceDeletions();
-    this.options = new ScanOptions(-1, authorizations, defaultLabels, columnSet, ssiList, ssio, interruptFlag, false, batchTimeOut);
+    this.options = new ScanOptions(-1, authorizations, defaultLabels, columnSet, ssiList, ssio, interruptFlag, false, samplerConfig, batchTimeOut);
     this.interruptFlag = interruptFlag;
   }
 
@@ -117,6 +121,8 @@ class ScanDataSource implements DataSource {
 
     Map<FileRef,DataFileValue> files;
 
+    SamplerConfigurationImpl samplerConfig = options.getSamplerConfigurationImpl();
+
     synchronized (tablet) {
 
       if (memIters != null)
@@ -141,26 +147,26 @@ class ScanDataSource implements DataSource {
       // getIterators() throws an exception
       expectedDeletionCount = tablet.getDataSourceDeletions();
 
-      memIters = tablet.getTabletMemory().getIterators();
+      memIters = tablet.getTabletMemory().getIterators(samplerConfig);
       Pair<Long,Map<FileRef,DataFileValue>> reservation = tablet.getDatafileManager().reserveFilesForScan();
       fileReservationId = reservation.getFirst();
       files = reservation.getSecond();
     }
 
-    Collection<InterruptibleIterator> mapfiles = fileManager.openFiles(files, options.isIsolated());
+    Collection<InterruptibleIterator> mapfiles = fileManager.openFiles(files, options.isIsolated(), samplerConfig);
+
+    for (SortedKeyValueIterator<Key,Value> skvi : Iterables.concat(mapfiles, memIters))
+      ((InterruptibleIterator) skvi).setInterruptFlag(interruptFlag);
 
     List<SortedKeyValueIterator<Key,Value>> iters = new ArrayList<SortedKeyValueIterator<Key,Value>>(mapfiles.size() + memIters.size());
 
     iters.addAll(mapfiles);
     iters.addAll(memIters);
 
-    for (SortedKeyValueIterator<Key,Value> skvi : iters)
-      ((InterruptibleIterator) skvi).setInterruptFlag(interruptFlag);
-
     MultiIterator multiIter = new MultiIterator(iters, tablet.getExtent());
 
     TabletIteratorEnvironment iterEnv = new TabletIteratorEnvironment(IteratorScope.scan, tablet.getTableConfiguration(), fileManager, files,
-        options.getAuthorizations());
+        options.getAuthorizations(), samplerConfig);
 
     statsIterator = new StatsIterator(multiIter, TabletServer.seekCount, tablet.getScannedCounter());
 
@@ -212,7 +218,7 @@ class ScanDataSource implements DataSource {
 
   public void reattachFileManager() throws IOException {
     if (fileManager != null)
-      fileManager.reattach();
+      fileManager.reattach(options.getSamplerConfigurationImpl());
   }
 
   public void detachFileManager() {

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/ScanOptions.java
----------------------------------------------------------------------
diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/ScanOptions.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/ScanOptions.java
index 2a38fbd..c97f3ac 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/ScanOptions.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/ScanOptions.java
@@ -21,8 +21,10 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicBoolean;
 
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.data.Column;
 import org.apache.accumulo.core.data.thrift.IterInfo;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.accumulo.core.security.Authorizations;
 
 final class ScanOptions {
@@ -35,10 +37,11 @@ final class ScanOptions {
   private final AtomicBoolean interruptFlag;
   private final int num;
   private final boolean isolated;
+  private SamplerConfiguration samplerConfig;
   private final long batchTimeOut;
 
   ScanOptions(int num, Authorizations authorizations, byte[] defaultLabels, Set<Column> columnSet, List<IterInfo> ssiList, Map<String,Map<String,String>> ssio,
-      AtomicBoolean interruptFlag, boolean isolated, long batchTimeOut) {
+      AtomicBoolean interruptFlag, boolean isolated, SamplerConfiguration samplerConfig, long batchTimeOut) {
     this.num = num;
     this.authorizations = authorizations;
     this.defaultLabels = defaultLabels;
@@ -47,6 +50,7 @@ final class ScanOptions {
     this.ssio = ssio;
     this.interruptFlag = interruptFlag;
     this.isolated = isolated;
+    this.samplerConfig = samplerConfig;
     this.batchTimeOut = batchTimeOut;
   }
 
@@ -82,6 +86,16 @@ final class ScanOptions {
     return isolated;
   }
 
+  public SamplerConfiguration getSamplerConfiguration() {
+    return samplerConfig;
+  }
+
+  public SamplerConfigurationImpl getSamplerConfigurationImpl() {
+    if (samplerConfig == null)
+      return null;
+    return new SamplerConfigurationImpl(samplerConfig);
+  }
+
   public long getBatchTimeOut() {
     return batchTimeOut;
   }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java
----------------------------------------------------------------------
diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java
index b8c260d..1f66302 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java
@@ -51,6 +51,7 @@ import org.apache.accumulo.core.Constants;
 import org.apache.accumulo.core.client.Durability;
 import org.apache.accumulo.core.client.IteratorSetting;
 import org.apache.accumulo.core.client.admin.CompactionStrategyConfig;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.impl.DurabilityImpl;
 import org.apache.accumulo.core.client.impl.Tables;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
@@ -632,7 +633,8 @@ public class Tablet implements TabletCommitter {
   }
 
   public LookupResult lookup(List<Range> ranges, HashSet<Column> columns, Authorizations authorizations, List<KVEntry> results, long maxResultSize,
-      List<IterInfo> ssiList, Map<String,Map<String,String>> ssio, AtomicBoolean interruptFlag, long batchTimeOut) throws IOException {
+      List<IterInfo> ssiList, Map<String,Map<String,String>> ssio, AtomicBoolean interruptFlag, SamplerConfiguration samplerConfig, long batchTimeOut)
+      throws IOException {
 
     if (ranges.size() == 0) {
       return new LookupResult();
@@ -650,7 +652,8 @@ public class Tablet implements TabletCommitter {
       tabletRange.clip(range);
     }
 
-    ScanDataSource dataSource = new ScanDataSource(this, authorizations, this.defaultSecurityLabel, columns, ssiList, ssio, interruptFlag, batchTimeOut);
+    ScanDataSource dataSource = new ScanDataSource(this, authorizations, this.defaultSecurityLabel, columns, ssiList, ssio, interruptFlag, samplerConfig,
+        batchTimeOut);
 
     LookupResult result = null;
 
@@ -754,12 +757,13 @@ public class Tablet implements TabletCommitter {
   }
 
   public Scanner createScanner(Range range, int num, Set<Column> columns, Authorizations authorizations, List<IterInfo> ssiList,
-      Map<String,Map<String,String>> ssio, boolean isolated, AtomicBoolean interruptFlag, long batchTimeOut) {
+      Map<String,Map<String,String>> ssio, boolean isolated, AtomicBoolean interruptFlag, SamplerConfiguration samplerConfig, long batchTimeOut) {
     // do a test to see if this range falls within the tablet, if it does not
     // then clip will throw an exception
     extent.toDataRange().clip(range);
 
-    ScanOptions opts = new ScanOptions(num, authorizations, this.defaultSecurityLabel, columns, ssiList, ssio, interruptFlag, isolated, batchTimeOut);
+    ScanOptions opts = new ScanOptions(num, authorizations, this.defaultSecurityLabel, columns, ssiList, ssio, interruptFlag, isolated, samplerConfig,
+        batchTimeOut);
     return new Scanner(this, range, opts);
   }
 

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/TabletMemory.java
----------------------------------------------------------------------
diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/TabletMemory.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/TabletMemory.java
index 0b39d40..86cc262 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/TabletMemory.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/TabletMemory.java
@@ -22,6 +22,7 @@ import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.accumulo.core.util.LocalityGroupUtil.LocalityGroupConfigurationError;
 import org.apache.accumulo.tserver.InMemoryMap;
 import org.apache.accumulo.tserver.InMemoryMap.MemoryIterator;
@@ -156,11 +157,11 @@ class TabletMemory implements Closeable {
     tablet.updateMemoryUsageStats(memTable.estimatedSizeInBytes(), other);
   }
 
-  public List<MemoryIterator> getIterators() {
+  public List<MemoryIterator> getIterators(SamplerConfigurationImpl samplerConfig) {
     List<MemoryIterator> toReturn = new ArrayList<MemoryIterator>(2);
-    toReturn.add(memTable.skvIterator());
+    toReturn.add(memTable.skvIterator(samplerConfig));
     if (otherMemTable != null)
-      toReturn.add(otherMemTable.skvIterator());
+      toReturn.add(otherMemTable.skvIterator(samplerConfig));
     return toReturn;
   }
 

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/server/tserver/src/test/java/org/apache/accumulo/tserver/InMemoryMapTest.java
----------------------------------------------------------------------
diff --git a/server/tserver/src/test/java/org/apache/accumulo/tserver/InMemoryMapTest.java b/server/tserver/src/test/java/org/apache/accumulo/tserver/InMemoryMapTest.java
index da7157a..7b4d447 100644
--- a/server/tserver/src/test/java/org/apache/accumulo/tserver/InMemoryMapTest.java
+++ b/server/tserver/src/test/java/org/apache/accumulo/tserver/InMemoryMapTest.java
@@ -26,16 +26,22 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
-import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
-import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Set;
+import java.util.TreeMap;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 
+import org.apache.accumulo.core.client.SampleNotPresentException;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
+import org.apache.accumulo.core.client.impl.BaseIteratorEnvironment;
+import org.apache.accumulo.core.conf.ConfigurationCopy;
+import org.apache.accumulo.core.conf.DefaultConfiguration;
+import org.apache.accumulo.core.conf.Property;
 import org.apache.accumulo.core.data.ArrayByteSequence;
 import org.apache.accumulo.core.data.ByteSequence;
 import org.apache.accumulo.core.data.Key;
@@ -45,21 +51,56 @@ import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.iterators.IterationInterruptedException;
 import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 import org.apache.accumulo.core.iterators.system.ColumnFamilySkippingIterator;
+import org.apache.accumulo.core.sample.RowSampler;
+import org.apache.accumulo.core.sample.Sampler;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
+import org.apache.accumulo.core.sample.impl.SamplerFactory;
 import org.apache.accumulo.core.util.LocalityGroupUtil;
+import org.apache.accumulo.core.util.LocalityGroupUtil.LocalityGroupConfigurationError;
 import org.apache.accumulo.server.client.HdfsZooInstance;
 import org.apache.accumulo.server.conf.ZooConfiguration;
 import org.apache.accumulo.tserver.InMemoryMap.MemoryIterator;
 import org.apache.hadoop.io.Text;
 import org.apache.log4j.Level;
 import org.apache.log4j.Logger;
+import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.Ignore;
 import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.rules.TemporaryFolder;
 
+import com.google.common.collect.ImmutableMap;
+
 public class InMemoryMapTest {
 
+  private static class SampleIE extends BaseIteratorEnvironment {
+
+    private final SamplerConfiguration sampleConfig;
+
+    public SampleIE() {
+      this.sampleConfig = null;
+    }
+
+    public SampleIE(SamplerConfigurationImpl sampleConfig) {
+      this.sampleConfig = sampleConfig.toSamplerConfiguration();
+    }
+
+    @Override
+    public boolean isSamplingEnabled() {
+      return sampleConfig != null;
+    }
+
+    @Override
+    public SamplerConfiguration getSamplerConfiguration() {
+      return sampleConfig;
+    }
+  }
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
   @BeforeClass
   public static void setUp() throws Exception {
     // suppress log messages having to do with not having an instance
@@ -101,20 +142,42 @@ public class InMemoryMapTest {
   }
 
   static Set<ByteSequence> newCFSet(String... cfs) {
-    HashSet<ByteSequence> cfSet = new HashSet<ByteSequence>();
+    HashSet<ByteSequence> cfSet = new HashSet<>();
     for (String cf : cfs) {
       cfSet.add(new ArrayByteSequence(cf));
     }
     return cfSet;
   }
 
+  static Set<Text> toTextSet(String... cfs) {
+    HashSet<Text> cfSet = new HashSet<>();
+    for (String cf : cfs) {
+      cfSet.add(new Text(cf));
+    }
+    return cfSet;
+  }
+
+  static ConfigurationCopy newConfig(String memDumpDir) {
+    ConfigurationCopy config = new ConfigurationCopy(DefaultConfiguration.getInstance());
+    config.set(Property.TSERV_NATIVEMAP_ENABLED, "" + false);
+    config.set(Property.TSERV_MEMDUMP_DIR, memDumpDir);
+    return config;
+  }
+
+  static InMemoryMap newInMemoryMap(boolean useNative, String memDumpDir) throws LocalityGroupConfigurationError {
+    ConfigurationCopy config = new ConfigurationCopy(DefaultConfiguration.getInstance());
+    config.set(Property.TSERV_NATIVEMAP_ENABLED, "" + useNative);
+    config.set(Property.TSERV_MEMDUMP_DIR, memDumpDir);
+    return new InMemoryMap(config);
+  }
+
   @Test
   public void test2() throws Exception {
-    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
+    InMemoryMap imm = newInMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 
-    MemoryIterator ski1 = imm.skvIterator();
+    MemoryIterator ski1 = imm.skvIterator(null);
     mutate(imm, "r1", "foo:cq1", 3, "bar1");
-    MemoryIterator ski2 = imm.skvIterator();
+    MemoryIterator ski2 = imm.skvIterator(null);
 
     ski1.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
     assertFalse(ski1.hasTop());
@@ -128,17 +191,17 @@ public class InMemoryMapTest {
 
   @Test
   public void test3() throws Exception {
-    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
+    InMemoryMap imm = newInMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 
     mutate(imm, "r1", "foo:cq1", 3, "bar1");
     mutate(imm, "r1", "foo:cq1", 3, "bar2");
-    MemoryIterator ski1 = imm.skvIterator();
+    MemoryIterator ski1 = imm.skvIterator(null);
     mutate(imm, "r1", "foo:cq1", 3, "bar3");
 
     mutate(imm, "r3", "foo:cq1", 3, "bar9");
     mutate(imm, "r3", "foo:cq1", 3, "bara");
 
-    MemoryIterator ski2 = imm.skvIterator();
+    MemoryIterator ski2 = imm.skvIterator(null);
 
     ski1.seek(new Range(new Text("r1")), LocalityGroupUtil.EMPTY_CF_SET, false);
     ae(ski1, "r1", "foo:cq1", 3, "bar2");
@@ -154,11 +217,11 @@ public class InMemoryMapTest {
 
   @Test
   public void test4() throws Exception {
-    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
+    InMemoryMap imm = newInMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 
     mutate(imm, "r1", "foo:cq1", 3, "bar1");
     mutate(imm, "r1", "foo:cq1", 3, "bar2");
-    MemoryIterator ski1 = imm.skvIterator();
+    MemoryIterator ski1 = imm.skvIterator(null);
     mutate(imm, "r1", "foo:cq1", 3, "bar3");
 
     imm.delete(0);
@@ -186,13 +249,13 @@ public class InMemoryMapTest {
 
   @Test
   public void test5() throws Exception {
-    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
+    InMemoryMap imm = newInMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 
     mutate(imm, "r1", "foo:cq1", 3, "bar1");
     mutate(imm, "r1", "foo:cq1", 3, "bar2");
     mutate(imm, "r1", "foo:cq1", 3, "bar3");
 
-    MemoryIterator ski1 = imm.skvIterator();
+    MemoryIterator ski1 = imm.skvIterator(null);
     ski1.seek(new Range(new Text("r1")), LocalityGroupUtil.EMPTY_CF_SET, false);
     ae(ski1, "r1", "foo:cq1", 3, "bar3");
 
@@ -204,13 +267,13 @@ public class InMemoryMapTest {
 
     ski1.close();
 
-    imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
+    imm = newInMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 
     mutate(imm, "r1", "foo:cq1", 3, "bar1");
     mutate(imm, "r1", "foo:cq2", 3, "bar2");
     mutate(imm, "r1", "foo:cq3", 3, "bar3");
 
-    ski1 = imm.skvIterator();
+    ski1 = imm.skvIterator(null);
     ski1.seek(new Range(new Text("r1")), LocalityGroupUtil.EMPTY_CF_SET, false);
     ae(ski1, "r1", "foo:cq1", 3, "bar1");
 
@@ -225,18 +288,18 @@ public class InMemoryMapTest {
 
   @Test
   public void test6() throws Exception {
-    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
+    InMemoryMap imm = newInMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 
     mutate(imm, "r1", "foo:cq1", 3, "bar1");
     mutate(imm, "r1", "foo:cq2", 3, "bar2");
     mutate(imm, "r1", "foo:cq3", 3, "bar3");
     mutate(imm, "r1", "foo:cq4", 3, "bar4");
 
-    MemoryIterator ski1 = imm.skvIterator();
+    MemoryIterator ski1 = imm.skvIterator(null);
 
     mutate(imm, "r1", "foo:cq5", 3, "bar5");
 
-    SortedKeyValueIterator<Key,Value> dc = ski1.deepCopy(null);
+    SortedKeyValueIterator<Key,Value> dc = ski1.deepCopy(new SampleIE());
 
     ski1.seek(new Range(nk("r1", "foo:cq1", 3), null), LocalityGroupUtil.EMPTY_CF_SET, false);
     ae(ski1, "r1", "foo:cq1", 3, "bar1");
@@ -271,12 +334,12 @@ public class InMemoryMapTest {
   private void deepCopyAndDelete(int interleaving, boolean interrupt) throws Exception {
     // interleaving == 0 intentionally omitted, this runs the test w/o deleting in mem map
 
-    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
+    InMemoryMap imm = newInMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 
     mutate(imm, "r1", "foo:cq1", 3, "bar1");
     mutate(imm, "r1", "foo:cq2", 3, "bar2");
 
-    MemoryIterator ski1 = imm.skvIterator();
+    MemoryIterator ski1 = imm.skvIterator(null);
 
     AtomicBoolean iflag = new AtomicBoolean(false);
     ski1.setInterruptFlag(iflag);
@@ -287,7 +350,7 @@ public class InMemoryMapTest {
         iflag.set(true);
     }
 
-    SortedKeyValueIterator<Key,Value> dc = ski1.deepCopy(null);
+    SortedKeyValueIterator<Key,Value> dc = ski1.deepCopy(new SampleIE());
 
     if (interleaving == 2) {
       imm.delete(0);
@@ -338,7 +401,7 @@ public class InMemoryMapTest {
 
   @Test
   public void testBug1() throws Exception {
-    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
+    InMemoryMap imm = newInMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 
     for (int i = 0; i < 20; i++) {
       mutate(imm, "r1", "foo:cq" + i, 3, "bar" + i);
@@ -348,7 +411,7 @@ public class InMemoryMapTest {
       mutate(imm, "r2", "foo:cq" + i, 3, "bar" + i);
     }
 
-    MemoryIterator ski1 = imm.skvIterator();
+    MemoryIterator ski1 = imm.skvIterator(null);
     ColumnFamilySkippingIterator cfsi = new ColumnFamilySkippingIterator(ski1);
 
     imm.delete(0);
@@ -366,14 +429,14 @@ public class InMemoryMapTest {
 
   @Test
   public void testSeekBackWards() throws Exception {
-    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
+    InMemoryMap imm = newInMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 
     mutate(imm, "r1", "foo:cq1", 3, "bar1");
     mutate(imm, "r1", "foo:cq2", 3, "bar2");
     mutate(imm, "r1", "foo:cq3", 3, "bar3");
     mutate(imm, "r1", "foo:cq4", 3, "bar4");
 
-    MemoryIterator skvi1 = imm.skvIterator();
+    MemoryIterator skvi1 = imm.skvIterator(null);
 
     skvi1.seek(new Range(nk("r1", "foo:cq3", 3), null), LocalityGroupUtil.EMPTY_CF_SET, false);
     ae(skvi1, "r1", "foo:cq3", 3, "bar3");
@@ -385,14 +448,14 @@ public class InMemoryMapTest {
 
   @Test
   public void testDuplicateKey() throws Exception {
-    InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
+    InMemoryMap imm = newInMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
 
     Mutation m = new Mutation(new Text("r1"));
     m.put(new Text("foo"), new Text("cq"), 3, new Value("v1".getBytes()));
     m.put(new Text("foo"), new Text("cq"), 3, new Value("v2".getBytes()));
     imm.mutate(Collections.singletonList(m));
 
-    MemoryIterator skvi1 = imm.skvIterator();
+    MemoryIterator skvi1 = imm.skvIterator(null);
     skvi1.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
     ae(skvi1, "r1", "foo:cq", 3, "v2");
     ae(skvi1, "r1", "foo:cq", 3, "v1");
@@ -410,12 +473,12 @@ public class InMemoryMapTest {
   // - hard to get this timing test to run well on apache build machines
   @Test
   @Ignore
-  public void parallelWriteSpeed() throws InterruptedException, IOException {
+  public void parallelWriteSpeed() throws Exception {
     List<Double> timings = new ArrayList<Double>();
     for (int threads : new int[] {1, 2, 16, /* 64, 256 */}) {
       final long now = System.currentTimeMillis();
       final long counts[] = new long[threads];
-      final InMemoryMap imm = new InMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
+      final InMemoryMap imm = newInMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
       ExecutorService e = Executors.newFixedThreadPool(threads);
       for (int j = 0; j < threads; j++) {
         final int threadId = j;
@@ -451,12 +514,12 @@ public class InMemoryMapTest {
 
   @Test
   public void testLocalityGroups() throws Exception {
+    ConfigurationCopy config = newConfig(tempFolder.newFolder().getAbsolutePath());
+    config.set(Property.TABLE_LOCALITY_GROUP_PREFIX + "lg1", LocalityGroupUtil.encodeColumnFamilies(toTextSet("cf1", "cf2")));
+    config.set(Property.TABLE_LOCALITY_GROUP_PREFIX + "lg2", LocalityGroupUtil.encodeColumnFamilies(toTextSet("cf3", "cf4")));
+    config.set(Property.TABLE_LOCALITY_GROUPS.getKey(), "lg1,lg2");
 
-    Map<String,Set<ByteSequence>> lggroups1 = new HashMap<String,Set<ByteSequence>>();
-    lggroups1.put("lg1", newCFSet("cf1", "cf2"));
-    lggroups1.put("lg2", newCFSet("cf3", "cf4"));
-
-    InMemoryMap imm = new InMemoryMap(lggroups1, false, tempFolder.newFolder().getAbsolutePath());
+    InMemoryMap imm = new InMemoryMap(config);
 
     Mutation m1 = new Mutation("r1");
     m1.put("cf1", "x", 2, "1");
@@ -480,10 +543,10 @@ public class InMemoryMapTest {
 
     imm.mutate(Arrays.asList(m1, m2, m3, m4, m5));
 
-    MemoryIterator iter1 = imm.skvIterator();
+    MemoryIterator iter1 = imm.skvIterator(null);
 
     seekLocalityGroups(iter1);
-    SortedKeyValueIterator<Key,Value> dc1 = iter1.deepCopy(null);
+    SortedKeyValueIterator<Key,Value> dc1 = iter1.deepCopy(new SampleIE());
     seekLocalityGroups(dc1);
 
     assertTrue(imm.getNumEntries() == 10);
@@ -497,6 +560,254 @@ public class InMemoryMapTest {
     // seekLocalityGroups(iter1.deepCopy(null));
   }
 
+  @Test
+  public void testSample() throws Exception {
+
+    SamplerConfigurationImpl sampleConfig = new SamplerConfigurationImpl(RowSampler.class.getName(), ImmutableMap.of("hasher", "murmur3_32", "modulus", "7"));
+    Sampler sampler = SamplerFactory.newSampler(sampleConfig, DefaultConfiguration.getInstance());
+
+    ConfigurationCopy config1 = newConfig(tempFolder.newFolder().getAbsolutePath());
+    for (Entry<String,String> entry : sampleConfig.toTablePropertiesMap().entrySet()) {
+      config1.set(entry.getKey(), entry.getValue());
+    }
+
+    ConfigurationCopy config2 = newConfig(tempFolder.newFolder().getAbsolutePath());
+    config2.set(Property.TABLE_LOCALITY_GROUP_PREFIX + "lg1", LocalityGroupUtil.encodeColumnFamilies(toTextSet("cf2")));
+    config2.set(Property.TABLE_LOCALITY_GROUPS.getKey(), "lg1");
+    for (Entry<String,String> entry : sampleConfig.toTablePropertiesMap().entrySet()) {
+      config2.set(entry.getKey(), entry.getValue());
+    }
+
+    for (ConfigurationCopy config : Arrays.asList(config1, config2)) {
+
+      InMemoryMap imm = new InMemoryMap(config);
+
+      TreeMap<Key,Value> expectedSample = new TreeMap<>();
+      TreeMap<Key,Value> expectedAll = new TreeMap<>();
+      TreeMap<Key,Value> expectedNone = new TreeMap<>();
+
+      MemoryIterator iter0 = imm.skvIterator(sampleConfig);
+
+      for (int r = 0; r < 100; r++) {
+        String row = String.format("r%06d", r);
+        mutate(imm, row, "cf1:cq1", 5, "v" + (2 * r), sampler, expectedSample, expectedAll);
+        mutate(imm, row, "cf2:cq2", 5, "v" + ((2 * r) + 1), sampler, expectedSample, expectedAll);
+      }
+
+      assertTrue(expectedSample.size() > 0);
+
+      MemoryIterator iter1 = imm.skvIterator(sampleConfig);
+      MemoryIterator iter2 = imm.skvIterator(null);
+      SortedKeyValueIterator<Key,Value> iter0dc1 = iter0.deepCopy(new SampleIE());
+      SortedKeyValueIterator<Key,Value> iter0dc2 = iter0.deepCopy(new SampleIE(sampleConfig));
+      SortedKeyValueIterator<Key,Value> iter1dc1 = iter1.deepCopy(new SampleIE());
+      SortedKeyValueIterator<Key,Value> iter1dc2 = iter1.deepCopy(new SampleIE(sampleConfig));
+      SortedKeyValueIterator<Key,Value> iter2dc1 = iter2.deepCopy(new SampleIE());
+      SortedKeyValueIterator<Key,Value> iter2dc2 = iter2.deepCopy(new SampleIE(sampleConfig));
+
+      assertEquals(expectedNone, readAll(iter0));
+      assertEquals(expectedNone, readAll(iter0dc1));
+      assertEquals(expectedNone, readAll(iter0dc2));
+      assertEquals(expectedSample, readAll(iter1));
+      assertEquals(expectedAll, readAll(iter2));
+      assertEquals(expectedAll, readAll(iter1dc1));
+      assertEquals(expectedAll, readAll(iter2dc1));
+      assertEquals(expectedSample, readAll(iter1dc2));
+      assertEquals(expectedSample, readAll(iter2dc2));
+
+      imm.delete(0);
+
+      assertEquals(expectedNone, readAll(iter0));
+      assertEquals(expectedNone, readAll(iter0dc1));
+      assertEquals(expectedNone, readAll(iter0dc2));
+      assertEquals(expectedSample, readAll(iter1));
+      assertEquals(expectedAll, readAll(iter2));
+      assertEquals(expectedAll, readAll(iter1dc1));
+      assertEquals(expectedAll, readAll(iter2dc1));
+      assertEquals(expectedSample, readAll(iter1dc2));
+      assertEquals(expectedSample, readAll(iter2dc2));
+
+      SortedKeyValueIterator<Key,Value> iter0dc3 = iter0.deepCopy(new SampleIE());
+      SortedKeyValueIterator<Key,Value> iter0dc4 = iter0.deepCopy(new SampleIE(sampleConfig));
+      SortedKeyValueIterator<Key,Value> iter1dc3 = iter1.deepCopy(new SampleIE());
+      SortedKeyValueIterator<Key,Value> iter1dc4 = iter1.deepCopy(new SampleIE(sampleConfig));
+      SortedKeyValueIterator<Key,Value> iter2dc3 = iter2.deepCopy(new SampleIE());
+      SortedKeyValueIterator<Key,Value> iter2dc4 = iter2.deepCopy(new SampleIE(sampleConfig));
+
+      assertEquals(expectedNone, readAll(iter0dc3));
+      assertEquals(expectedNone, readAll(iter0dc4));
+      assertEquals(expectedAll, readAll(iter1dc3));
+      assertEquals(expectedAll, readAll(iter2dc3));
+      assertEquals(expectedSample, readAll(iter1dc4));
+      assertEquals(expectedSample, readAll(iter2dc4));
+
+      iter1.close();
+      iter2.close();
+    }
+  }
+
+  @Test
+  public void testInterruptingSample() throws Exception {
+    runInterruptSampleTest(false, false, false);
+    runInterruptSampleTest(false, true, false);
+    runInterruptSampleTest(true, false, false);
+    runInterruptSampleTest(true, true, false);
+    runInterruptSampleTest(true, true, true);
+  }
+
+  private void runInterruptSampleTest(boolean deepCopy, boolean delete, boolean dcAfterDelete) throws Exception {
+    SamplerConfigurationImpl sampleConfig1 = new SamplerConfigurationImpl(RowSampler.class.getName(), ImmutableMap.of("hasher", "murmur3_32", "modulus", "2"));
+    Sampler sampler = SamplerFactory.newSampler(sampleConfig1, DefaultConfiguration.getInstance());
+
+    ConfigurationCopy config1 = newConfig(tempFolder.newFolder().getAbsolutePath());
+    for (Entry<String,String> entry : sampleConfig1.toTablePropertiesMap().entrySet()) {
+      config1.set(entry.getKey(), entry.getValue());
+    }
+
+    InMemoryMap imm = new InMemoryMap(config1);
+
+    TreeMap<Key,Value> expectedSample = new TreeMap<>();
+    TreeMap<Key,Value> expectedAll = new TreeMap<>();
+
+    for (int r = 0; r < 1000; r++) {
+      String row = String.format("r%06d", r);
+      mutate(imm, row, "cf1:cq1", 5, "v" + (2 * r), sampler, expectedSample, expectedAll);
+      mutate(imm, row, "cf2:cq2", 5, "v" + ((2 * r) + 1), sampler, expectedSample, expectedAll);
+    }
+
+    assertTrue(expectedSample.size() > 0);
+
+    MemoryIterator miter = imm.skvIterator(sampleConfig1);
+    AtomicBoolean iFlag = new AtomicBoolean(false);
+    miter.setInterruptFlag(iFlag);
+    SortedKeyValueIterator<Key,Value> iter = miter;
+
+    if (delete && !dcAfterDelete) {
+      imm.delete(0);
+    }
+
+    if (deepCopy) {
+      iter = iter.deepCopy(new SampleIE(sampleConfig1));
+    }
+
+    if (delete && dcAfterDelete) {
+      imm.delete(0);
+    }
+
+    assertEquals(expectedSample, readAll(iter));
+    iFlag.set(true);
+    try {
+      readAll(iter);
+      Assert.fail();
+    } catch (IterationInterruptedException iie) {}
+
+    miter.close();
+  }
+
+  private void mutate(InMemoryMap imm, String row, String cols, int ts, String val, Sampler sampler, TreeMap<Key,Value> expectedSample,
+      TreeMap<Key,Value> expectedAll) {
+    mutate(imm, row, cols, ts, val);
+    Key k1 = nk(row, cols, ts);
+    if (sampler.accept(k1)) {
+      expectedSample.put(k1, new Value(val.getBytes()));
+    }
+    expectedAll.put(k1, new Value(val.getBytes()));
+  }
+
+  @Test(expected = SampleNotPresentException.class)
+  public void testDifferentSampleConfig() throws Exception {
+    SamplerConfigurationImpl sampleConfig = new SamplerConfigurationImpl(RowSampler.class.getName(), ImmutableMap.of("hasher", "murmur3_32", "modulus", "7"));
+
+    ConfigurationCopy config1 = newConfig(tempFolder.newFolder().getAbsolutePath());
+    for (Entry<String,String> entry : sampleConfig.toTablePropertiesMap().entrySet()) {
+      config1.set(entry.getKey(), entry.getValue());
+    }
+
+    InMemoryMap imm = new InMemoryMap(config1);
+
+    mutate(imm, "r", "cf:cq", 5, "b");
+
+    SamplerConfigurationImpl sampleConfig2 = new SamplerConfigurationImpl(RowSampler.class.getName(), ImmutableMap.of("hasher", "murmur3_32", "modulus", "9"));
+    MemoryIterator iter = imm.skvIterator(sampleConfig2);
+    iter.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
+  }
+
+  @Test(expected = SampleNotPresentException.class)
+  public void testNoSampleConfig() throws Exception {
+    InMemoryMap imm = newInMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
+
+    mutate(imm, "r", "cf:cq", 5, "b");
+
+    SamplerConfigurationImpl sampleConfig2 = new SamplerConfigurationImpl(RowSampler.class.getName(), ImmutableMap.of("hasher", "murmur3_32", "modulus", "9"));
+    MemoryIterator iter = imm.skvIterator(sampleConfig2);
+    iter.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
+  }
+
+  @Test
+  public void testEmptyNoSampleConfig() throws Exception {
+    InMemoryMap imm = newInMemoryMap(false, tempFolder.newFolder().getAbsolutePath());
+
+    SamplerConfigurationImpl sampleConfig2 = new SamplerConfigurationImpl(RowSampler.class.getName(), ImmutableMap.of("hasher", "murmur3_32", "modulus", "9"));
+
+    // when in mem map is empty should be able to get sample iterator with any sample config
+    MemoryIterator iter = imm.skvIterator(sampleConfig2);
+    iter.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
+    Assert.assertFalse(iter.hasTop());
+  }
+
+  @Test
+  public void testDeferredSamplerCreation() throws Exception {
+    SamplerConfigurationImpl sampleConfig1 = new SamplerConfigurationImpl(RowSampler.class.getName(), ImmutableMap.of("hasher", "murmur3_32", "modulus", "9"));
+
+    ConfigurationCopy config1 = newConfig(tempFolder.newFolder().getAbsolutePath());
+    for (Entry<String,String> entry : sampleConfig1.toTablePropertiesMap().entrySet()) {
+      config1.set(entry.getKey(), entry.getValue());
+    }
+
+    InMemoryMap imm = new InMemoryMap(config1);
+
+    // change sampler config after creating in mem map.
+    SamplerConfigurationImpl sampleConfig2 = new SamplerConfigurationImpl(RowSampler.class.getName(), ImmutableMap.of("hasher", "murmur3_32", "modulus", "7"));
+    for (Entry<String,String> entry : sampleConfig2.toTablePropertiesMap().entrySet()) {
+      config1.set(entry.getKey(), entry.getValue());
+    }
+
+    TreeMap<Key,Value> expectedSample = new TreeMap<>();
+    TreeMap<Key,Value> expectedAll = new TreeMap<>();
+    Sampler sampler = SamplerFactory.newSampler(sampleConfig2, config1);
+
+    for (int i = 0; i < 100; i++) {
+      mutate(imm, "r" + i, "cf:cq", 5, "v" + i, sampler, expectedSample, expectedAll);
+    }
+
+    MemoryIterator iter = imm.skvIterator(sampleConfig2);
+    iter.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
+    Assert.assertEquals(expectedSample, readAll(iter));
+
+    SortedKeyValueIterator<Key,Value> dc = iter.deepCopy(new SampleIE(sampleConfig2));
+    dc.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
+    Assert.assertEquals(expectedSample, readAll(dc));
+
+    iter = imm.skvIterator(null);
+    iter.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
+    Assert.assertEquals(expectedAll, readAll(iter));
+
+    iter = imm.skvIterator(sampleConfig1);
+    thrown.expect(SampleNotPresentException.class);
+    iter.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
+  }
+
+  private TreeMap<Key,Value> readAll(SortedKeyValueIterator<Key,Value> iter) throws IOException {
+    iter.seek(new Range(), LocalityGroupUtil.EMPTY_CF_SET, false);
+
+    TreeMap<Key,Value> actual = new TreeMap<>();
+    while (iter.hasTop()) {
+      actual.put(iter.getTopKey(), iter.getTopValue());
+      iter.next();
+    }
+    return actual;
+  }
+
   private void seekLocalityGroups(SortedKeyValueIterator<Key,Value> iter1) throws IOException {
     iter1.seek(new Range(), newCFSet("cf1"), true);
     ae(iter1, "r1", "cf1:x", 2, "1");

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/server/tserver/src/test/java/org/apache/accumulo/tserver/compaction/DefaultCompactionStrategyTest.java
----------------------------------------------------------------------
diff --git a/server/tserver/src/test/java/org/apache/accumulo/tserver/compaction/DefaultCompactionStrategyTest.java b/server/tserver/src/test/java/org/apache/accumulo/tserver/compaction/DefaultCompactionStrategyTest.java
index 55226fb..0388c1f 100644
--- a/server/tserver/src/test/java/org/apache/accumulo/tserver/compaction/DefaultCompactionStrategyTest.java
+++ b/server/tserver/src/test/java/org/apache/accumulo/tserver/compaction/DefaultCompactionStrategyTest.java
@@ -41,6 +41,7 @@ import org.apache.accumulo.core.file.NoSuchMetaStoreException;
 import org.apache.accumulo.core.iterators.IteratorEnvironment;
 import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 import org.apache.accumulo.core.metadata.schema.DataFileValue;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.accumulo.core.util.Pair;
 import org.apache.accumulo.server.fs.FileRef;
 import org.apache.hadoop.io.Text;
@@ -133,6 +134,11 @@ public class DefaultCompactionStrategyTest {
     @Override
     public void close() throws IOException {}
 
+    @Override
+    public FileSKVIterator getSample(SamplerConfigurationImpl sampleConfig) {
+      return null;
+    }
+
   }
 
   static final DefaultConfiguration dfault = AccumuloConfiguration.getDefaultConfiguration();


[5/7] accumulo git commit: ACCUMULO-3913 Added per table sampling

Posted by kt...@apache.org.
http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/tabletserver/thrift/TSampleNotPresentException.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/tabletserver/thrift/TSampleNotPresentException.java b/core/src/main/java/org/apache/accumulo/core/tabletserver/thrift/TSampleNotPresentException.java
new file mode 100644
index 0000000..c4ef7f3
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/tabletserver/thrift/TSampleNotPresentException.java
@@ -0,0 +1,409 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * Autogenerated by Thrift Compiler (0.9.1)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.accumulo.core.tabletserver.thrift;
+
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import org.apache.thrift.async.AsyncMethodCallback;
+import org.apache.thrift.server.AbstractNonblockingServer.*;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@SuppressWarnings({"unchecked", "serial", "rawtypes", "unused"}) public class TSampleNotPresentException extends TException implements org.apache.thrift.TBase<TSampleNotPresentException, TSampleNotPresentException._Fields>, java.io.Serializable, Cloneable, Comparable<TSampleNotPresentException> {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TSampleNotPresentException");
+
+  private static final org.apache.thrift.protocol.TField EXTENT_FIELD_DESC = new org.apache.thrift.protocol.TField("extent", org.apache.thrift.protocol.TType.STRUCT, (short)1);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TSampleNotPresentExceptionStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TSampleNotPresentExceptionTupleSchemeFactory());
+  }
+
+  public org.apache.accumulo.core.data.thrift.TKeyExtent extent; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    EXTENT((short)1, "extent");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // EXTENT
+          return EXTENT;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.EXTENT, new org.apache.thrift.meta_data.FieldMetaData("extent", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, org.apache.accumulo.core.data.thrift.TKeyExtent.class)));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TSampleNotPresentException.class, metaDataMap);
+  }
+
+  public TSampleNotPresentException() {
+  }
+
+  public TSampleNotPresentException(
+    org.apache.accumulo.core.data.thrift.TKeyExtent extent)
+  {
+    this();
+    this.extent = extent;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TSampleNotPresentException(TSampleNotPresentException other) {
+    if (other.isSetExtent()) {
+      this.extent = new org.apache.accumulo.core.data.thrift.TKeyExtent(other.extent);
+    }
+  }
+
+  public TSampleNotPresentException deepCopy() {
+    return new TSampleNotPresentException(this);
+  }
+
+  @Override
+  public void clear() {
+    this.extent = null;
+  }
+
+  public org.apache.accumulo.core.data.thrift.TKeyExtent getExtent() {
+    return this.extent;
+  }
+
+  public TSampleNotPresentException setExtent(org.apache.accumulo.core.data.thrift.TKeyExtent extent) {
+    this.extent = extent;
+    return this;
+  }
+
+  public void unsetExtent() {
+    this.extent = null;
+  }
+
+  /** Returns true if field extent is set (has been assigned a value) and false otherwise */
+  public boolean isSetExtent() {
+    return this.extent != null;
+  }
+
+  public void setExtentIsSet(boolean value) {
+    if (!value) {
+      this.extent = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case EXTENT:
+      if (value == null) {
+        unsetExtent();
+      } else {
+        setExtent((org.apache.accumulo.core.data.thrift.TKeyExtent)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case EXTENT:
+      return getExtent();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case EXTENT:
+      return isSetExtent();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TSampleNotPresentException)
+      return this.equals((TSampleNotPresentException)that);
+    return false;
+  }
+
+  public boolean equals(TSampleNotPresentException that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_extent = true && this.isSetExtent();
+    boolean that_present_extent = true && that.isSetExtent();
+    if (this_present_extent || that_present_extent) {
+      if (!(this_present_extent && that_present_extent))
+        return false;
+      if (!this.extent.equals(that.extent))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    return 0;
+  }
+
+  @Override
+  public int compareTo(TSampleNotPresentException other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+
+    lastComparison = Boolean.valueOf(isSetExtent()).compareTo(other.isSetExtent());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetExtent()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.extent, other.extent);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TSampleNotPresentException(");
+    boolean first = true;
+
+    sb.append("extent:");
+    if (this.extent == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.extent);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    // check for sub-struct validity
+    if (extent != null) {
+      extent.validate();
+    }
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TSampleNotPresentExceptionStandardSchemeFactory implements SchemeFactory {
+    public TSampleNotPresentExceptionStandardScheme getScheme() {
+      return new TSampleNotPresentExceptionStandardScheme();
+    }
+  }
+
+  private static class TSampleNotPresentExceptionStandardScheme extends StandardScheme<TSampleNotPresentException> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TSampleNotPresentException struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // EXTENT
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
+              struct.extent = new org.apache.accumulo.core.data.thrift.TKeyExtent();
+              struct.extent.read(iprot);
+              struct.setExtentIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+
+      // check for required fields of primitive type, which can't be checked in the validate method
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TSampleNotPresentException struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.extent != null) {
+        oprot.writeFieldBegin(EXTENT_FIELD_DESC);
+        struct.extent.write(oprot);
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TSampleNotPresentExceptionTupleSchemeFactory implements SchemeFactory {
+    public TSampleNotPresentExceptionTupleScheme getScheme() {
+      return new TSampleNotPresentExceptionTupleScheme();
+    }
+  }
+
+  private static class TSampleNotPresentExceptionTupleScheme extends TupleScheme<TSampleNotPresentException> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TSampleNotPresentException struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      BitSet optionals = new BitSet();
+      if (struct.isSetExtent()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetExtent()) {
+        struct.extent.write(oprot);
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TSampleNotPresentException struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.extent = new org.apache.accumulo.core.data.thrift.TKeyExtent();
+        struct.extent.read(iprot);
+        struct.setExtentIsSet(true);
+      }
+    }
+  }
+
+}
+

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/tabletserver/thrift/TSamplerConfiguration.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/tabletserver/thrift/TSamplerConfiguration.java b/core/src/main/java/org/apache/accumulo/core/tabletserver/thrift/TSamplerConfiguration.java
new file mode 100644
index 0000000..2d2b2d5
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/tabletserver/thrift/TSamplerConfiguration.java
@@ -0,0 +1,556 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * Autogenerated by Thrift Compiler (0.9.1)
+ *
+ * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+ *  @generated
+ */
+package org.apache.accumulo.core.tabletserver.thrift;
+
+import org.apache.thrift.scheme.IScheme;
+import org.apache.thrift.scheme.SchemeFactory;
+import org.apache.thrift.scheme.StandardScheme;
+
+import org.apache.thrift.scheme.TupleScheme;
+import org.apache.thrift.protocol.TTupleProtocol;
+import org.apache.thrift.protocol.TProtocolException;
+import org.apache.thrift.EncodingUtils;
+import org.apache.thrift.TException;
+import org.apache.thrift.async.AsyncMethodCallback;
+import org.apache.thrift.server.AbstractNonblockingServer.*;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.EnumMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.EnumSet;
+import java.util.Collections;
+import java.util.BitSet;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@SuppressWarnings({"unchecked", "serial", "rawtypes", "unused"}) public class TSamplerConfiguration implements org.apache.thrift.TBase<TSamplerConfiguration, TSamplerConfiguration._Fields>, java.io.Serializable, Cloneable, Comparable<TSamplerConfiguration> {
+  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TSamplerConfiguration");
+
+  private static final org.apache.thrift.protocol.TField CLASS_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("className", org.apache.thrift.protocol.TType.STRING, (short)1);
+  private static final org.apache.thrift.protocol.TField OPTIONS_FIELD_DESC = new org.apache.thrift.protocol.TField("options", org.apache.thrift.protocol.TType.MAP, (short)2);
+
+  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
+  static {
+    schemes.put(StandardScheme.class, new TSamplerConfigurationStandardSchemeFactory());
+    schemes.put(TupleScheme.class, new TSamplerConfigurationTupleSchemeFactory());
+  }
+
+  public String className; // required
+  public Map<String,String> options; // required
+
+  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
+  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
+    CLASS_NAME((short)1, "className"),
+    OPTIONS((short)2, "options");
+
+    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
+
+    static {
+      for (_Fields field : EnumSet.allOf(_Fields.class)) {
+        byName.put(field.getFieldName(), field);
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, or null if its not found.
+     */
+    public static _Fields findByThriftId(int fieldId) {
+      switch(fieldId) {
+        case 1: // CLASS_NAME
+          return CLASS_NAME;
+        case 2: // OPTIONS
+          return OPTIONS;
+        default:
+          return null;
+      }
+    }
+
+    /**
+     * Find the _Fields constant that matches fieldId, throwing an exception
+     * if it is not found.
+     */
+    public static _Fields findByThriftIdOrThrow(int fieldId) {
+      _Fields fields = findByThriftId(fieldId);
+      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
+      return fields;
+    }
+
+    /**
+     * Find the _Fields constant that matches name, or null if its not found.
+     */
+    public static _Fields findByName(String name) {
+      return byName.get(name);
+    }
+
+    private final short _thriftId;
+    private final String _fieldName;
+
+    _Fields(short thriftId, String fieldName) {
+      _thriftId = thriftId;
+      _fieldName = fieldName;
+    }
+
+    public short getThriftFieldId() {
+      return _thriftId;
+    }
+
+    public String getFieldName() {
+      return _fieldName;
+    }
+  }
+
+  // isset id assignments
+  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
+  static {
+    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
+    tmpMap.put(_Fields.CLASS_NAME, new org.apache.thrift.meta_data.FieldMetaData("className", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
+    tmpMap.put(_Fields.OPTIONS, new org.apache.thrift.meta_data.FieldMetaData("options", org.apache.thrift.TFieldRequirementType.DEFAULT, 
+        new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, 
+            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), 
+            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))));
+    metaDataMap = Collections.unmodifiableMap(tmpMap);
+    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TSamplerConfiguration.class, metaDataMap);
+  }
+
+  public TSamplerConfiguration() {
+  }
+
+  public TSamplerConfiguration(
+    String className,
+    Map<String,String> options)
+  {
+    this();
+    this.className = className;
+    this.options = options;
+  }
+
+  /**
+   * Performs a deep copy on <i>other</i>.
+   */
+  public TSamplerConfiguration(TSamplerConfiguration other) {
+    if (other.isSetClassName()) {
+      this.className = other.className;
+    }
+    if (other.isSetOptions()) {
+      Map<String,String> __this__options = new HashMap<String,String>(other.options);
+      this.options = __this__options;
+    }
+  }
+
+  public TSamplerConfiguration deepCopy() {
+    return new TSamplerConfiguration(this);
+  }
+
+  @Override
+  public void clear() {
+    this.className = null;
+    this.options = null;
+  }
+
+  public String getClassName() {
+    return this.className;
+  }
+
+  public TSamplerConfiguration setClassName(String className) {
+    this.className = className;
+    return this;
+  }
+
+  public void unsetClassName() {
+    this.className = null;
+  }
+
+  /** Returns true if field className is set (has been assigned a value) and false otherwise */
+  public boolean isSetClassName() {
+    return this.className != null;
+  }
+
+  public void setClassNameIsSet(boolean value) {
+    if (!value) {
+      this.className = null;
+    }
+  }
+
+  public int getOptionsSize() {
+    return (this.options == null) ? 0 : this.options.size();
+  }
+
+  public void putToOptions(String key, String val) {
+    if (this.options == null) {
+      this.options = new HashMap<String,String>();
+    }
+    this.options.put(key, val);
+  }
+
+  public Map<String,String> getOptions() {
+    return this.options;
+  }
+
+  public TSamplerConfiguration setOptions(Map<String,String> options) {
+    this.options = options;
+    return this;
+  }
+
+  public void unsetOptions() {
+    this.options = null;
+  }
+
+  /** Returns true if field options is set (has been assigned a value) and false otherwise */
+  public boolean isSetOptions() {
+    return this.options != null;
+  }
+
+  public void setOptionsIsSet(boolean value) {
+    if (!value) {
+      this.options = null;
+    }
+  }
+
+  public void setFieldValue(_Fields field, Object value) {
+    switch (field) {
+    case CLASS_NAME:
+      if (value == null) {
+        unsetClassName();
+      } else {
+        setClassName((String)value);
+      }
+      break;
+
+    case OPTIONS:
+      if (value == null) {
+        unsetOptions();
+      } else {
+        setOptions((Map<String,String>)value);
+      }
+      break;
+
+    }
+  }
+
+  public Object getFieldValue(_Fields field) {
+    switch (field) {
+    case CLASS_NAME:
+      return getClassName();
+
+    case OPTIONS:
+      return getOptions();
+
+    }
+    throw new IllegalStateException();
+  }
+
+  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
+  public boolean isSet(_Fields field) {
+    if (field == null) {
+      throw new IllegalArgumentException();
+    }
+
+    switch (field) {
+    case CLASS_NAME:
+      return isSetClassName();
+    case OPTIONS:
+      return isSetOptions();
+    }
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that == null)
+      return false;
+    if (that instanceof TSamplerConfiguration)
+      return this.equals((TSamplerConfiguration)that);
+    return false;
+  }
+
+  public boolean equals(TSamplerConfiguration that) {
+    if (that == null)
+      return false;
+
+    boolean this_present_className = true && this.isSetClassName();
+    boolean that_present_className = true && that.isSetClassName();
+    if (this_present_className || that_present_className) {
+      if (!(this_present_className && that_present_className))
+        return false;
+      if (!this.className.equals(that.className))
+        return false;
+    }
+
+    boolean this_present_options = true && this.isSetOptions();
+    boolean that_present_options = true && that.isSetOptions();
+    if (this_present_options || that_present_options) {
+      if (!(this_present_options && that_present_options))
+        return false;
+      if (!this.options.equals(that.options))
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    return 0;
+  }
+
+  @Override
+  public int compareTo(TSamplerConfiguration other) {
+    if (!getClass().equals(other.getClass())) {
+      return getClass().getName().compareTo(other.getClass().getName());
+    }
+
+    int lastComparison = 0;
+
+    lastComparison = Boolean.valueOf(isSetClassName()).compareTo(other.isSetClassName());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetClassName()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.className, other.className);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    lastComparison = Boolean.valueOf(isSetOptions()).compareTo(other.isSetOptions());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetOptions()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.options, other.options);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
+    return 0;
+  }
+
+  public _Fields fieldForId(int fieldId) {
+    return _Fields.findByThriftId(fieldId);
+  }
+
+  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
+    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
+  }
+
+  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
+    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("TSamplerConfiguration(");
+    boolean first = true;
+
+    sb.append("className:");
+    if (this.className == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.className);
+    }
+    first = false;
+    if (!first) sb.append(", ");
+    sb.append("options:");
+    if (this.options == null) {
+      sb.append("null");
+    } else {
+      sb.append(this.options);
+    }
+    first = false;
+    sb.append(")");
+    return sb.toString();
+  }
+
+  public void validate() throws org.apache.thrift.TException {
+    // check for required fields
+    // check for sub-struct validity
+  }
+
+  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
+    try {
+      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
+    try {
+      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
+    } catch (org.apache.thrift.TException te) {
+      throw new java.io.IOException(te);
+    }
+  }
+
+  private static class TSamplerConfigurationStandardSchemeFactory implements SchemeFactory {
+    public TSamplerConfigurationStandardScheme getScheme() {
+      return new TSamplerConfigurationStandardScheme();
+    }
+  }
+
+  private static class TSamplerConfigurationStandardScheme extends StandardScheme<TSamplerConfiguration> {
+
+    public void read(org.apache.thrift.protocol.TProtocol iprot, TSamplerConfiguration struct) throws org.apache.thrift.TException {
+      org.apache.thrift.protocol.TField schemeField;
+      iprot.readStructBegin();
+      while (true)
+      {
+        schemeField = iprot.readFieldBegin();
+        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
+          break;
+        }
+        switch (schemeField.id) {
+          case 1: // CLASS_NAME
+            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
+              struct.className = iprot.readString();
+              struct.setClassNameIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          case 2: // OPTIONS
+            if (schemeField.type == org.apache.thrift.protocol.TType.MAP) {
+              {
+                org.apache.thrift.protocol.TMap _map106 = iprot.readMapBegin();
+                struct.options = new HashMap<String,String>(2*_map106.size);
+                for (int _i107 = 0; _i107 < _map106.size; ++_i107)
+                {
+                  String _key108;
+                  String _val109;
+                  _key108 = iprot.readString();
+                  _val109 = iprot.readString();
+                  struct.options.put(_key108, _val109);
+                }
+                iprot.readMapEnd();
+              }
+              struct.setOptionsIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
+          default:
+            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+        }
+        iprot.readFieldEnd();
+      }
+      iprot.readStructEnd();
+
+      // check for required fields of primitive type, which can't be checked in the validate method
+      struct.validate();
+    }
+
+    public void write(org.apache.thrift.protocol.TProtocol oprot, TSamplerConfiguration struct) throws org.apache.thrift.TException {
+      struct.validate();
+
+      oprot.writeStructBegin(STRUCT_DESC);
+      if (struct.className != null) {
+        oprot.writeFieldBegin(CLASS_NAME_FIELD_DESC);
+        oprot.writeString(struct.className);
+        oprot.writeFieldEnd();
+      }
+      if (struct.options != null) {
+        oprot.writeFieldBegin(OPTIONS_FIELD_DESC);
+        {
+          oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, struct.options.size()));
+          for (Map.Entry<String, String> _iter110 : struct.options.entrySet())
+          {
+            oprot.writeString(_iter110.getKey());
+            oprot.writeString(_iter110.getValue());
+          }
+          oprot.writeMapEnd();
+        }
+        oprot.writeFieldEnd();
+      }
+      oprot.writeFieldStop();
+      oprot.writeStructEnd();
+    }
+
+  }
+
+  private static class TSamplerConfigurationTupleSchemeFactory implements SchemeFactory {
+    public TSamplerConfigurationTupleScheme getScheme() {
+      return new TSamplerConfigurationTupleScheme();
+    }
+  }
+
+  private static class TSamplerConfigurationTupleScheme extends TupleScheme<TSamplerConfiguration> {
+
+    @Override
+    public void write(org.apache.thrift.protocol.TProtocol prot, TSamplerConfiguration struct) throws org.apache.thrift.TException {
+      TTupleProtocol oprot = (TTupleProtocol) prot;
+      BitSet optionals = new BitSet();
+      if (struct.isSetClassName()) {
+        optionals.set(0);
+      }
+      if (struct.isSetOptions()) {
+        optionals.set(1);
+      }
+      oprot.writeBitSet(optionals, 2);
+      if (struct.isSetClassName()) {
+        oprot.writeString(struct.className);
+      }
+      if (struct.isSetOptions()) {
+        {
+          oprot.writeI32(struct.options.size());
+          for (Map.Entry<String, String> _iter111 : struct.options.entrySet())
+          {
+            oprot.writeString(_iter111.getKey());
+            oprot.writeString(_iter111.getValue());
+          }
+        }
+      }
+    }
+
+    @Override
+    public void read(org.apache.thrift.protocol.TProtocol prot, TSamplerConfiguration struct) throws org.apache.thrift.TException {
+      TTupleProtocol iprot = (TTupleProtocol) prot;
+      BitSet incoming = iprot.readBitSet(2);
+      if (incoming.get(0)) {
+        struct.className = iprot.readString();
+        struct.setClassNameIsSet(true);
+      }
+      if (incoming.get(1)) {
+        {
+          org.apache.thrift.protocol.TMap _map112 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, iprot.readI32());
+          struct.options = new HashMap<String,String>(2*_map112.size);
+          for (int _i113 = 0; _i113 < _map112.size; ++_i113)
+          {
+            String _key114;
+            String _val115;
+            _key114 = iprot.readString();
+            _val115 = iprot.readString();
+            struct.options.put(_key114, _val115);
+          }
+        }
+        struct.setOptionsIsSet(true);
+      }
+    }
+  }
+
+}
+


[6/7] accumulo git commit: ACCUMULO-3913 Added per table sampling

Posted by kt...@apache.org.
http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/compaction/NullType.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/compaction/NullType.java b/core/src/main/java/org/apache/accumulo/core/compaction/NullType.java
new file mode 100644
index 0000000..fb4c452
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/compaction/NullType.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.core.compaction;
+
+import com.google.common.base.Preconditions;
+
+public class NullType implements Type {
+  @Override
+  public String convert(String str) {
+    Preconditions.checkArgument(str == null);
+    return "";
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/conf/Property.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/conf/Property.java b/core/src/main/java/org/apache/accumulo/core/conf/Property.java
index 5bd5c8a..400577c 100644
--- a/core/src/main/java/org/apache/accumulo/core/conf/Property.java
+++ b/core/src/main/java/org/apache/accumulo/core/conf/Property.java
@@ -514,6 +514,16 @@ public enum Property {
   @Experimental
   TABLE_VOLUME_CHOOSER("table.volume.chooser", "org.apache.accumulo.server.fs.RandomVolumeChooser", PropertyType.CLASSNAME,
       "The class that will be used to select which volume will be used to create new files for this table."),
+  TABLE_SAMPLER(
+      "table.sampler",
+      "",
+      PropertyType.CLASSNAME,
+      "The name of a class that implements org.apache.accumulo.core.Sampler.  Setting this option enables storing a sample of data which can be scanned."
+          + "  Always having a current sample can useful for query optimization and data comprehension.   After enabling sampling for an existing table, a compaction "
+          + "is needed to compute the sample for existing data.  The compact command in the shell has an option to only compact files without sample data."),
+  TABLE_SAMPLER_OPTS("table.sampler.opt.", null, PropertyType.PREFIX,
+      "The property is used to set options for a sampler.  If a sample had two options like hasher and modulous, then the two properties "
+          + "table.sampler.opt.hasher=${hash algorithm} and table.sampler.opt.modulous=${mod} would be set."),
 
   // VFS ClassLoader properties
   VFS_CLASSLOADER_SYSTEM_CLASSPATH_PROPERTY(AccumuloVFSClassLoader.VFS_CLASSLOADER_SYSTEM_CLASSPATH_PROPERTY, "", PropertyType.STRING,
@@ -776,7 +786,7 @@ public enum Property {
     return validTableProperties.contains(key) || key.startsWith(Property.TABLE_CONSTRAINT_PREFIX.getKey())
         || key.startsWith(Property.TABLE_ITERATOR_PREFIX.getKey()) || key.startsWith(Property.TABLE_LOCALITY_GROUP_PREFIX.getKey())
         || key.startsWith(Property.TABLE_COMPACTION_STRATEGY_PREFIX.getKey()) || key.startsWith(Property.TABLE_REPLICATION_TARGET.getKey())
-        || key.startsWith(Property.TABLE_ARBITRARY_PROP_PREFIX.getKey());
+        || key.startsWith(Property.TABLE_ARBITRARY_PROP_PREFIX.getKey()) || key.startsWith(TABLE_SAMPLER_OPTS.getKey());
   }
 
   private static final EnumSet<Property> fixedProperties = EnumSet.of(Property.TSERV_CLIENTPORT, Property.TSERV_NATIVEMAP_ENABLED,

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/file/BloomFilterLayer.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/file/BloomFilterLayer.java b/core/src/main/java/org/apache/accumulo/core/file/BloomFilterLayer.java
index a5bea83..758df12 100644
--- a/core/src/main/java/org/apache/accumulo/core/file/BloomFilterLayer.java
+++ b/core/src/main/java/org/apache/accumulo/core/file/BloomFilterLayer.java
@@ -50,6 +50,7 @@ import org.apache.accumulo.core.file.keyfunctor.KeyFunctor;
 import org.apache.accumulo.core.file.rfile.RFile;
 import org.apache.accumulo.core.iterators.IteratorEnvironment;
 import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.accumulo.core.util.CachedConfiguration;
 import org.apache.accumulo.core.util.NamingThreadFactory;
 import org.apache.accumulo.fate.util.LoggingRunnable;
@@ -424,6 +425,11 @@ public class BloomFilterLayer {
       reader.setInterruptFlag(flag);
     }
 
+    @Override
+    public FileSKVIterator getSample(SamplerConfigurationImpl sampleConfig) {
+      return new BloomFilterLayer.Reader(reader.getSample(sampleConfig), bfl);
+    }
+
   }
 
   public static void main(String[] args) throws IOException {

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/file/FileSKVIterator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/file/FileSKVIterator.java b/core/src/main/java/org/apache/accumulo/core/file/FileSKVIterator.java
index 60970e2..3713453 100644
--- a/core/src/main/java/org/apache/accumulo/core/file/FileSKVIterator.java
+++ b/core/src/main/java/org/apache/accumulo/core/file/FileSKVIterator.java
@@ -21,14 +21,17 @@ import java.io.IOException;
 
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.iterators.system.InterruptibleIterator;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 
-public interface FileSKVIterator extends InterruptibleIterator {
+public interface FileSKVIterator extends InterruptibleIterator, AutoCloseable {
   Key getFirstKey() throws IOException;
 
   Key getLastKey() throws IOException;
 
   DataInputStream getMetaStore(String name) throws IOException, NoSuchMetaStoreException;
 
+  FileSKVIterator getSample(SamplerConfigurationImpl sampleConfig);
+
   void closeDeepCopies() throws IOException;
 
   void close() throws IOException;

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/file/map/MapFileOperations.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/file/map/MapFileOperations.java b/core/src/main/java/org/apache/accumulo/core/file/map/MapFileOperations.java
index fb2762f..75cfa7e 100644
--- a/core/src/main/java/org/apache/accumulo/core/file/map/MapFileOperations.java
+++ b/core/src/main/java/org/apache/accumulo/core/file/map/MapFileOperations.java
@@ -37,6 +37,7 @@ import org.apache.accumulo.core.iterators.IteratorEnvironment;
 import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 import org.apache.accumulo.core.iterators.system.MapFileIterator;
 import org.apache.accumulo.core.iterators.system.SequenceFileIterator;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -132,6 +133,11 @@ public class MapFileOperations extends FileOperations {
     public void setInterruptFlag(AtomicBoolean flag) {
       ((FileSKVIterator) reader).setInterruptFlag(flag);
     }
+
+    @Override
+    public FileSKVIterator getSample(SamplerConfigurationImpl sampleConfig) {
+      return ((FileSKVIterator) reader).getSample(sampleConfig);
+    }
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiIndexIterator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiIndexIterator.java b/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiIndexIterator.java
index f220a58..01af184 100644
--- a/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiIndexIterator.java
+++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiIndexIterator.java
@@ -33,6 +33,7 @@ import org.apache.accumulo.core.file.rfile.MultiLevelIndex.IndexEntry;
 import org.apache.accumulo.core.iterators.IteratorEnvironment;
 import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 import org.apache.accumulo.core.iterators.system.HeapIterator;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 
 class MultiIndexIterator extends HeapIterator implements FileSKVIterator {
 
@@ -93,4 +94,9 @@ class MultiIndexIterator extends HeapIterator implements FileSKVIterator {
     throw new UnsupportedOperationException();
   }
 
+  @Override
+  public FileSKVIterator getSample(SamplerConfigurationImpl sampleConfig) {
+    throw new UnsupportedOperationException();
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiLevelIndex.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiLevelIndex.java b/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiLevelIndex.java
index 2109478..1a383e4 100644
--- a/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiLevelIndex.java
+++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/MultiLevelIndex.java
@@ -265,7 +265,7 @@ public class MultiLevelIndex {
 
     public void readFields(DataInput in, int version) throws IOException {
 
-      if (version == RFile.RINDEX_VER_6 || version == RFile.RINDEX_VER_7) {
+      if (version == RFile.RINDEX_VER_6 || version == RFile.RINDEX_VER_7 || version == RFile.RINDEX_VER_8) {
         level = in.readInt();
         offset = in.readInt();
         hasNext = in.readBoolean();
@@ -736,7 +736,7 @@ public class MultiLevelIndex {
 
       size = 0;
 
-      if (version == RFile.RINDEX_VER_6 || version == RFile.RINDEX_VER_7) {
+      if (version == RFile.RINDEX_VER_6 || version == RFile.RINDEX_VER_7 || version == RFile.RINDEX_VER_8) {
         size = in.readInt();
       }
 

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/file/rfile/PrintInfo.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/file/rfile/PrintInfo.java b/core/src/main/java/org/apache/accumulo/core/file/rfile/PrintInfo.java
index 5a3e911..4631a4d 100644
--- a/core/src/main/java/org/apache/accumulo/core/file/rfile/PrintInfo.java
+++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/PrintInfo.java
@@ -28,6 +28,7 @@ import org.apache.accumulo.core.data.ByteSequence;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.file.FileSKVIterator;
 import org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile;
 import org.apache.accumulo.core.file.rfile.RFile.Reader;
 import org.apache.accumulo.start.spi.KeywordExecutable;
@@ -54,6 +55,8 @@ public class PrintInfo implements KeywordExecutable {
     boolean hash = false;
     @Parameter(names = {"--histogram"}, description = "print a histogram of the key-value sizes")
     boolean histogram = false;
+    @Parameter(names = {"--useSample"}, description = "Use sample data for --dump, --vis, --histogram options")
+    boolean useSample = false;
     @Parameter(description = " <file> { <file> ... }")
     List<String> files = new ArrayList<String>();
     @Parameter(names = {"-c", "--config"}, variableArity = true, description = "Comma-separated Hadoop configuration files")
@@ -119,14 +122,27 @@ public class PrintInfo implements KeywordExecutable {
       if (opts.histogram || opts.dump || opts.vis || opts.hash) {
         localityGroupCF = iter.getLocalityGroupCF();
 
+        FileSKVIterator dataIter = iter;
+        if (opts.useSample) {
+          dataIter = iter.getSample();
+
+          if (dataIter == null) {
+            System.out.println("ERROR : This rfile has no sample data");
+            return;
+          }
+        }
+
         for (Entry<String,ArrayList<ByteSequence>> cf : localityGroupCF.entrySet()) {
 
-          iter.seek(new Range((Key) null, (Key) null), cf.getValue(), true);
-          while (iter.hasTop()) {
-            Key key = iter.getTopKey();
-            Value value = iter.getTopValue();
-            if (opts.dump)
+          dataIter.seek(new Range((Key) null, (Key) null), cf.getValue(), true);
+          while (dataIter.hasTop()) {
+            Key key = dataIter.getTopKey();
+            Value value = dataIter.getTopValue();
+            if (opts.dump) {
               System.out.println(key + " -> " + value);
+              if (System.out.checkError())
+                return;
+            }
             if (opts.histogram) {
               long size = key.getSize() + value.getSize();
               int bucket = (int) Math.log10(size);
@@ -134,7 +150,7 @@ public class PrintInfo implements KeywordExecutable {
               sizeBuckets[bucket] += size;
               totalSize += size;
             }
-            iter.next();
+            dataIter.next();
           }
         }
       }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
index 54b01b4..9564f0b 100644
--- a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
+++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
@@ -36,6 +36,8 @@ import java.util.Set;
 import java.util.TreeMap;
 import java.util.concurrent.atomic.AtomicBoolean;
 
+import org.apache.accumulo.core.client.SampleNotPresentException;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.conf.Property;
 import org.apache.accumulo.core.data.ArrayByteSequence;
@@ -62,12 +64,17 @@ import org.apache.accumulo.core.iterators.system.HeapIterator;
 import org.apache.accumulo.core.iterators.system.InterruptibleIterator;
 import org.apache.accumulo.core.iterators.system.LocalityGroupIterator;
 import org.apache.accumulo.core.iterators.system.LocalityGroupIterator.LocalityGroup;
+import org.apache.accumulo.core.sample.Sampler;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.accumulo.core.util.MutableByteSequence;
 import org.apache.commons.lang.mutable.MutableLong;
 import org.apache.hadoop.io.Writable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+
 public class RFile {
 
   public static final String EXTENSION = "rf";
@@ -77,15 +84,38 @@ public class RFile {
   private RFile() {}
 
   private static final int RINDEX_MAGIC = 0x20637474;
-  static final int RINDEX_VER_7 = 7;
-  static final int RINDEX_VER_6 = 6;
+
+  static final int RINDEX_VER_8 = 8; // Added sample storage. There is a sample locality group for each locality group. Sample are built using a Sampler and
+                                     // sampler configuration. The Sampler and its configuration are stored in RFile. Persisting the method of producing the
+                                     // sample allows a user of RFile to determine if the sample is useful.
+  static final int RINDEX_VER_7 = 7; // Added support for prefix encoding and encryption. Before this change only exact matches within a key field were deduped
+                                     // for consecutive keys. After this change, if consecutive key fields have the same prefix then the prefix is only stored
+                                     // once.
+  static final int RINDEX_VER_6 = 6; // Added support for multilevel indexes. Before this the index was one list with an entry for each data block. For large
+                                     // files, a large index needed to be read into memory before any seek could be done. After this change the index is a fat
+                                     // tree, and opening a large rfile is much faster. Like the previous version of Rfile, each index node in the tree is kept
+                                     // in memory serialized and used in its serialized form.
   // static final int RINDEX_VER_5 = 5; // unreleased
-  static final int RINDEX_VER_4 = 4;
-  static final int RINDEX_VER_3 = 3;
+  static final int RINDEX_VER_4 = 4; // Added support for seeking using serialized indexes. After this change index is no longer deserialized when rfile opened.
+                                     // Entire serialized index is read into memory as single byte array. For seeks, serialized index is used to find blocks
+                                     // (the binary search deserializes the specific entries its needs). This resulted in less memory usage (no object overhead)
+                                     // and faster open times for RFiles.
+  static final int RINDEX_VER_3 = 3; // Initial released version of RFile. R is for relative encoding. A keys is encoded relative to the previous key. The
+                                     // initial version deduped key fields that were the same for consecutive keys. For sorted data this is a common occurrence.
+                                     // This version supports locality groups. Each locality group has an index pointing to set of data blocks. Each data block
+                                     // contains relatively encoded keys and values.
+
+  // Buffer sample data so that many sample data blocks are stored contiguously.
+  private static int sampleBufferSize = 10000000;
+
+  @VisibleForTesting
+  public static void setSampleBufferSize(int bufferSize) {
+    sampleBufferSize = bufferSize;
+  }
 
   private static class LocalityGroupMetadata implements Writable {
 
-    private int startBlock;
+    private int startBlock = -1;
     private Key firstKey;
     private Map<ByteSequence,MutableLong> columnFamilies;
 
@@ -95,14 +125,15 @@ public class RFile {
 
     private MultiLevelIndex.BufferedWriter indexWriter;
     private MultiLevelIndex.Reader indexReader;
+    private int version;
 
     public LocalityGroupMetadata(int version, BlockFileReader br) {
       columnFamilies = new HashMap<ByteSequence,MutableLong>();
       indexReader = new MultiLevelIndex.Reader(br, version);
+      this.version = version;
     }
 
-    public LocalityGroupMetadata(int nextBlock, Set<ByteSequence> pcf, int indexBlockSize, BlockFileWriter bfw) {
-      this.startBlock = nextBlock;
+    public LocalityGroupMetadata(Set<ByteSequence> pcf, int indexBlockSize, BlockFileWriter bfw) {
       isDefaultLG = true;
       columnFamilies = new HashMap<ByteSequence,MutableLong>();
       previousColumnFamilies = pcf;
@@ -110,8 +141,7 @@ public class RFile {
       indexWriter = new MultiLevelIndex.BufferedWriter(new MultiLevelIndex.Writer(bfw, indexBlockSize));
     }
 
-    public LocalityGroupMetadata(String name, Set<ByteSequence> cfset, int nextBlock, int indexBlockSize, BlockFileWriter bfw) {
-      this.startBlock = nextBlock;
+    public LocalityGroupMetadata(String name, Set<ByteSequence> cfset, int indexBlockSize, BlockFileWriter bfw) {
       this.name = name;
       isDefaultLG = false;
       columnFamilies = new HashMap<ByteSequence,MutableLong>();
@@ -181,7 +211,9 @@ public class RFile {
         name = in.readUTF();
       }
 
-      startBlock = in.readInt();
+      if (version == RINDEX_VER_3 || version == RINDEX_VER_4 || version == RINDEX_VER_6 || version == RINDEX_VER_7) {
+        startBlock = in.readInt();
+      }
 
       int size = in.readInt();
 
@@ -224,8 +256,6 @@ public class RFile {
         out.writeUTF(name);
       }
 
-      out.writeInt(startBlock);
-
       if (isDefaultLG && columnFamilies == null) {
         // only expect null when default LG, otherwise let a NPE occur
         out.writeInt(-1);
@@ -246,26 +276,27 @@ public class RFile {
       indexWriter.close(out);
     }
 
-    public void printInfo() throws IOException {
+    public void printInfo(boolean isSample) throws IOException {
       PrintStream out = System.out;
-      out.println("Locality group         : " + (isDefaultLG ? "<DEFAULT>" : name));
-      out.println("\tStart block          : " + startBlock);
-      out.println("\tNum   blocks         : " + String.format("%,d", indexReader.size()));
+      out.printf("%-24s : %s\n", (isSample ? "Sample " : "") + "Locality group ", (isDefaultLG ? "<DEFAULT>" : name));
+      if (version == RINDEX_VER_3 || version == RINDEX_VER_4 || version == RINDEX_VER_6 || version == RINDEX_VER_7) {
+        out.printf("\t%-22s : %d\n", "Start block", startBlock);
+      }
+      out.printf("\t%-22s : %,d\n", "Num   blocks", indexReader.size());
       TreeMap<Integer,Long> sizesByLevel = new TreeMap<Integer,Long>();
       TreeMap<Integer,Long> countsByLevel = new TreeMap<Integer,Long>();
       indexReader.getIndexInfo(sizesByLevel, countsByLevel);
       for (Entry<Integer,Long> entry : sizesByLevel.descendingMap().entrySet()) {
-        out.println("\tIndex level " + entry.getKey() + "        : "
-            + String.format("%,d bytes  %,d blocks", entry.getValue(), countsByLevel.get(entry.getKey())));
+        out.printf("\t%-22s : %,d bytes  %,d blocks\n", "Index level " + entry.getKey(), entry.getValue(), countsByLevel.get(entry.getKey()));
       }
-      out.println("\tFirst key            : " + firstKey);
+      out.printf("\t%-22s : %s\n", "First key", firstKey);
 
       Key lastKey = null;
       if (indexReader.size() > 0) {
         lastKey = indexReader.getLastKey();
       }
 
-      out.println("\tLast key             : " + lastKey);
+      out.printf("\t%-22s : %s\n", "Last key", lastKey);
 
       long numKeys = 0;
       IndexIterator countIter = indexReader.lookup(new Key());
@@ -273,48 +304,193 @@ public class RFile {
         numKeys += countIter.next().getNumEntries();
       }
 
-      out.println("\tNum entries          : " + String.format("%,d", numKeys));
-      out.println("\tColumn families      : " + (isDefaultLG && columnFamilies == null ? "<UNKNOWN>" : columnFamilies.keySet()));
+      out.printf("\t%-22s : %,d\n", "Num entries", numKeys);
+      out.printf("\t%-22s : %s\n", "Column families", (isDefaultLG && columnFamilies == null ? "<UNKNOWN>" : columnFamilies.keySet()));
     }
 
   }
 
-  public static class Writer implements FileSKVWriter {
+  private static class SampleEntry {
+    Key key;
+    Value val;
 
-    public static final int MAX_CF_IN_DLG = 1000;
+    SampleEntry(Key key, Value val) {
+      this.key = new Key(key);
+      this.val = new Value(val);
+    }
+  }
+
+  private static class SampleLocalityGroupWriter {
+
+    private Sampler sampler;
+
+    private List<SampleEntry> entries = new ArrayList<>();
+    private long dataSize = 0;
+
+    private LocalityGroupWriter lgr;
+
+    public SampleLocalityGroupWriter(LocalityGroupWriter lgr, Sampler sampler) {
+      this.lgr = lgr;
+      this.sampler = sampler;
+    }
+
+    public void append(Key key, Value value) throws IOException {
+      if (sampler.accept(key)) {
+        entries.add(new SampleEntry(key, value));
+        dataSize += key.getSize() + value.getSize();
+      }
+    }
+
+    public void close() throws IOException {
+      for (SampleEntry se : entries) {
+        lgr.append(se.key, se.val);
+      }
+
+      lgr.close();
+    }
+
+    public void flushIfNeeded() throws IOException {
+      if (dataSize > sampleBufferSize) {
+        // the reason to write out all but one key is so that closeBlock() can always eventually be called with true
+        List<SampleEntry> subList = entries.subList(0, entries.size() - 1);
+
+        if (subList.size() > 0) {
+          for (SampleEntry se : subList) {
+            lgr.append(se.key, se.val);
+          }
+
+          lgr.closeBlock(subList.get(subList.size() - 1).key, false);
+
+          subList.clear();
+          dataSize = 0;
+        }
+      }
+    }
+  }
+
+  private static class LocalityGroupWriter {
 
     private BlockFileWriter fileWriter;
     private ABlockWriter blockWriter;
 
     // private BlockAppender blockAppender;
     private long blockSize = 100000;
-    private int indexBlockSize;
     private int entries = 0;
 
-    private ArrayList<LocalityGroupMetadata> localityGroups = new ArrayList<LocalityGroupMetadata>();
     private LocalityGroupMetadata currentLocalityGroup = null;
-    private int nextBlock = 0;
 
     private Key lastKeyInBlock = null;
 
+    private Key prevKey = new Key();
+
+    private SampleLocalityGroupWriter sample;
+
+    LocalityGroupWriter(BlockFileWriter fileWriter, long blockSize, LocalityGroupMetadata currentLocalityGroup, SampleLocalityGroupWriter sample) {
+      this.fileWriter = fileWriter;
+      this.blockSize = blockSize;
+      this.currentLocalityGroup = currentLocalityGroup;
+      this.sample = sample;
+    }
+
+    public void append(Key key, Value value) throws IOException {
+
+      if (key.compareTo(prevKey) < 0) {
+        throw new IllegalStateException("Keys appended out-of-order.  New key " + key + ", previous key " + prevKey);
+      }
+
+      currentLocalityGroup.updateColumnCount(key);
+
+      if (currentLocalityGroup.getFirstKey() == null) {
+        currentLocalityGroup.setFirstKey(key);
+      }
+
+      if (sample != null) {
+        sample.append(key, value);
+      }
+
+      if (blockWriter == null) {
+        blockWriter = fileWriter.prepareDataBlock();
+      } else if (blockWriter.getRawSize() > blockSize) {
+        closeBlock(prevKey, false);
+        blockWriter = fileWriter.prepareDataBlock();
+      }
+
+      RelativeKey rk = new RelativeKey(lastKeyInBlock, key);
+
+      rk.write(blockWriter);
+      value.write(blockWriter);
+      entries++;
+
+      prevKey = new Key(key);
+      lastKeyInBlock = prevKey;
+
+    }
+
+    private void closeBlock(Key key, boolean lastBlock) throws IOException {
+      blockWriter.close();
+
+      if (lastBlock)
+        currentLocalityGroup.indexWriter.addLast(key, entries, blockWriter.getStartPos(), blockWriter.getCompressedSize(), blockWriter.getRawSize());
+      else
+        currentLocalityGroup.indexWriter.add(key, entries, blockWriter.getStartPos(), blockWriter.getCompressedSize(), blockWriter.getRawSize());
+
+      if (sample != null)
+        sample.flushIfNeeded();
+
+      blockWriter = null;
+      lastKeyInBlock = null;
+      entries = 0;
+    }
+
+    public void close() throws IOException {
+      if (blockWriter != null) {
+        closeBlock(lastKeyInBlock, true);
+      }
+
+      if (sample != null) {
+        sample.close();
+      }
+    }
+  }
+
+  public static class Writer implements FileSKVWriter {
+
+    public static final int MAX_CF_IN_DLG = 1000;
+
+    private BlockFileWriter fileWriter;
+
+    // private BlockAppender blockAppender;
+    private long blockSize = 100000;
+    private int indexBlockSize;
+
+    private ArrayList<LocalityGroupMetadata> localityGroups = new ArrayList<LocalityGroupMetadata>();
+    private ArrayList<LocalityGroupMetadata> sampleGroups = new ArrayList<LocalityGroupMetadata>();
+    private LocalityGroupMetadata currentLocalityGroup = null;
+    private LocalityGroupMetadata sampleLocalityGroup = null;
+
     private boolean dataClosed = false;
     private boolean closed = false;
-    private Key prevKey = new Key();
     private boolean startedDefaultLocalityGroup = false;
 
     private HashSet<ByteSequence> previousColumnFamilies;
     private long length = -1;
 
+    private LocalityGroupWriter lgWriter;
+
+    private SamplerConfigurationImpl samplerConfig;
+    private Sampler sampler;
+
     public Writer(BlockFileWriter bfw, int blockSize) throws IOException {
-      this(bfw, blockSize, (int) AccumuloConfiguration.getDefaultConfiguration().getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX));
+      this(bfw, blockSize, (int) AccumuloConfiguration.getDefaultConfiguration().getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX), null, null);
     }
 
-    public Writer(BlockFileWriter bfw, int blockSize, int indexBlockSize) throws IOException {
+    public Writer(BlockFileWriter bfw, int blockSize, int indexBlockSize, SamplerConfigurationImpl samplerConfig, Sampler sampler) throws IOException {
       this.blockSize = blockSize;
       this.indexBlockSize = indexBlockSize;
       this.fileWriter = bfw;
-      this.blockWriter = null;
       previousColumnFamilies = new HashSet<ByteSequence>();
+      this.samplerConfig = samplerConfig;
+      this.sampler = sampler;
     }
 
     @Override
@@ -329,10 +505,12 @@ public class RFile {
       ABlockWriter mba = fileWriter.prepareMetaBlock("RFile.index");
 
       mba.writeInt(RINDEX_MAGIC);
-      mba.writeInt(RINDEX_VER_7);
+      mba.writeInt(RINDEX_VER_8);
 
-      if (currentLocalityGroup != null)
+      if (currentLocalityGroup != null) {
         localityGroups.add(currentLocalityGroup);
+        sampleGroups.add(sampleLocalityGroup);
+      }
 
       mba.writeInt(localityGroups.size());
 
@@ -340,6 +518,18 @@ public class RFile {
         lc.write(mba);
       }
 
+      if (samplerConfig == null) {
+        mba.writeBoolean(false);
+      } else {
+        mba.writeBoolean(true);
+
+        for (LocalityGroupMetadata lc : sampleGroups) {
+          lc.write(mba);
+        }
+
+        samplerConfig.write(mba);
+      }
+
       mba.close();
       fileWriter.close();
       length = fileWriter.getLength();
@@ -355,8 +545,8 @@ public class RFile {
 
       dataClosed = true;
 
-      if (blockWriter != null) {
-        closeBlock(lastKeyInBlock, true);
+      if (lgWriter != null) {
+        lgWriter.close();
       }
     }
 
@@ -367,46 +557,7 @@ public class RFile {
         throw new IllegalStateException("Cannont append, data closed");
       }
 
-      if (key.compareTo(prevKey) < 0) {
-        throw new IllegalStateException("Keys appended out-of-order.  New key " + key + ", previous key " + prevKey);
-      }
-
-      currentLocalityGroup.updateColumnCount(key);
-
-      if (currentLocalityGroup.getFirstKey() == null) {
-        currentLocalityGroup.setFirstKey(key);
-      }
-
-      if (blockWriter == null) {
-        blockWriter = fileWriter.prepareDataBlock();
-      } else if (blockWriter.getRawSize() > blockSize) {
-        closeBlock(prevKey, false);
-        blockWriter = fileWriter.prepareDataBlock();
-      }
-
-      RelativeKey rk = new RelativeKey(lastKeyInBlock, key);
-
-      rk.write(blockWriter);
-      value.write(blockWriter);
-      entries++;
-
-      prevKey = new Key(key);
-      lastKeyInBlock = prevKey;
-
-    }
-
-    private void closeBlock(Key key, boolean lastBlock) throws IOException {
-      blockWriter.close();
-
-      if (lastBlock)
-        currentLocalityGroup.indexWriter.addLast(key, entries, blockWriter.getStartPos(), blockWriter.getCompressedSize(), blockWriter.getRawSize());
-      else
-        currentLocalityGroup.indexWriter.add(key, entries, blockWriter.getStartPos(), blockWriter.getCompressedSize(), blockWriter.getRawSize());
-
-      blockWriter = null;
-      lastKeyInBlock = null;
-      entries = 0;
-      nextBlock++;
+      lgWriter.append(key, value);
     }
 
     @Override
@@ -425,28 +576,35 @@ public class RFile {
         throw new IllegalStateException("Can not start anymore new locality groups after default locality group started");
       }
 
-      if (blockWriter != null) {
-        closeBlock(lastKeyInBlock, true);
+      if (lgWriter != null) {
+        lgWriter.close();
       }
 
       if (currentLocalityGroup != null) {
         localityGroups.add(currentLocalityGroup);
+        sampleGroups.add(sampleLocalityGroup);
       }
 
       if (columnFamilies == null) {
         startedDefaultLocalityGroup = true;
-        currentLocalityGroup = new LocalityGroupMetadata(nextBlock, previousColumnFamilies, indexBlockSize, fileWriter);
+        currentLocalityGroup = new LocalityGroupMetadata(previousColumnFamilies, indexBlockSize, fileWriter);
+        sampleLocalityGroup = new LocalityGroupMetadata(previousColumnFamilies, indexBlockSize, fileWriter);
       } else {
         if (!Collections.disjoint(columnFamilies, previousColumnFamilies)) {
           HashSet<ByteSequence> overlap = new HashSet<ByteSequence>(columnFamilies);
           overlap.retainAll(previousColumnFamilies);
           throw new IllegalArgumentException("Column families over lap with previous locality group : " + overlap);
         }
-        currentLocalityGroup = new LocalityGroupMetadata(name, columnFamilies, nextBlock, indexBlockSize, fileWriter);
+        currentLocalityGroup = new LocalityGroupMetadata(name, columnFamilies, indexBlockSize, fileWriter);
+        sampleLocalityGroup = new LocalityGroupMetadata(name, columnFamilies, indexBlockSize, fileWriter);
         previousColumnFamilies.addAll(columnFamilies);
       }
 
-      prevKey = new Key();
+      SampleLocalityGroupWriter sampleWriter = null;
+      if (sampler != null) {
+        sampleWriter = new SampleLocalityGroupWriter(new LocalityGroupWriter(fileWriter, blockSize, sampleLocalityGroup, null), sampler);
+      }
+      lgWriter = new LocalityGroupWriter(fileWriter, blockSize, currentLocalityGroup, sampleWriter);
     }
 
     @Override
@@ -616,8 +774,9 @@ public class RFile {
       if (columnFamilies.size() != 0 || inclusive)
         throw new IllegalArgumentException("I do not know how to filter column families");
 
-      if (interruptFlag != null && interruptFlag.get())
+      if (interruptFlag != null && interruptFlag.get()) {
         throw new IterationInterruptedException();
+      }
 
       try {
         _seek(range);
@@ -830,6 +989,11 @@ public class RFile {
     public void registerMetrics(MetricsGatherer<?> vmg) {
       metricsGatherer = vmg;
     }
+
+    @Override
+    public FileSKVIterator getSample(SamplerConfigurationImpl sampleConfig) {
+      throw new UnsupportedOperationException();
+    }
   }
 
   public static class Reader extends HeapIterator implements FileSKVIterator {
@@ -837,8 +1001,12 @@ public class RFile {
     private BlockFileReader reader;
 
     private ArrayList<LocalityGroupMetadata> localityGroups = new ArrayList<LocalityGroupMetadata>();
+    private ArrayList<LocalityGroupMetadata> sampleGroups = new ArrayList<LocalityGroupMetadata>();
+
+    private LocalityGroupReader currentReaders[];
+    private LocalityGroupReader readers[];
+    private LocalityGroupReader sampleReaders[];
 
-    private LocalityGroupReader lgReaders[];
     private HashSet<ByteSequence> nonDefaultColumnFamilies;
 
     private List<Reader> deepCopies;
@@ -846,6 +1014,10 @@ public class RFile {
 
     private AtomicBoolean interruptFlag;
 
+    private SamplerConfigurationImpl samplerConfig = null;
+
+    private int rfileVersion;
+
     public Reader(BlockFileReader rdr) throws IOException {
       this.reader = rdr;
 
@@ -853,14 +1025,15 @@ public class RFile {
       try {
         int magic = mb.readInt();
         int ver = mb.readInt();
+        rfileVersion = ver;
 
         if (magic != RINDEX_MAGIC)
           throw new IOException("Did not see expected magic number, saw " + magic);
-        if (ver != RINDEX_VER_7 && ver != RINDEX_VER_6 && ver != RINDEX_VER_4 && ver != RINDEX_VER_3)
+        if (ver != RINDEX_VER_8 && ver != RINDEX_VER_7 && ver != RINDEX_VER_6 && ver != RINDEX_VER_4 && ver != RINDEX_VER_3)
           throw new IOException("Did not see expected version, saw " + ver);
 
         int size = mb.readInt();
-        lgReaders = new LocalityGroupReader[size];
+        currentReaders = new LocalityGroupReader[size];
 
         deepCopies = new LinkedList<Reader>();
 
@@ -869,8 +1042,28 @@ public class RFile {
           lgm.readFields(mb);
           localityGroups.add(lgm);
 
-          lgReaders[i] = new LocalityGroupReader(reader, lgm, ver);
+          currentReaders[i] = new LocalityGroupReader(reader, lgm, ver);
+        }
+
+        readers = currentReaders;
+
+        if (ver == RINDEX_VER_8 && mb.readBoolean()) {
+          sampleReaders = new LocalityGroupReader[size];
+
+          for (int i = 0; i < size; i++) {
+            LocalityGroupMetadata lgm = new LocalityGroupMetadata(ver, rdr);
+            lgm.readFields(mb);
+            sampleGroups.add(lgm);
+
+            sampleReaders[i] = new LocalityGroupReader(reader, lgm, ver);
+          }
+
+          samplerConfig = new SamplerConfigurationImpl(mb);
+        } else {
+          sampleReaders = null;
+          samplerConfig = null;
         }
+
       } finally {
         mb.close();
       }
@@ -881,24 +1074,53 @@ public class RFile {
           nonDefaultColumnFamilies.addAll(lgm.columnFamilies.keySet());
       }
 
-      createHeap(lgReaders.length);
+      createHeap(currentReaders.length);
+    }
+
+    private Reader(Reader r, LocalityGroupReader sampleReaders[]) {
+      super(sampleReaders.length);
+      this.reader = r.reader;
+      this.nonDefaultColumnFamilies = r.nonDefaultColumnFamilies;
+      this.currentReaders = new LocalityGroupReader[sampleReaders.length];
+      this.deepCopies = r.deepCopies;
+      this.deepCopy = false;
+      this.readers = r.readers;
+      this.sampleReaders = r.sampleReaders;
+      this.samplerConfig = r.samplerConfig;
+      this.rfileVersion = r.rfileVersion;
+      for (int i = 0; i < sampleReaders.length; i++) {
+        this.currentReaders[i] = sampleReaders[i];
+        this.currentReaders[i].setInterruptFlag(r.interruptFlag);
+      }
     }
 
-    private Reader(Reader r) {
-      super(r.lgReaders.length);
+    private Reader(Reader r, boolean useSample) {
+      super(r.currentReaders.length);
       this.reader = r.reader;
       this.nonDefaultColumnFamilies = r.nonDefaultColumnFamilies;
-      this.lgReaders = new LocalityGroupReader[r.lgReaders.length];
+      this.currentReaders = new LocalityGroupReader[r.currentReaders.length];
       this.deepCopies = r.deepCopies;
       this.deepCopy = true;
-      for (int i = 0; i < lgReaders.length; i++) {
-        this.lgReaders[i] = new LocalityGroupReader(r.lgReaders[i]);
-        this.lgReaders[i].setInterruptFlag(r.interruptFlag);
+      this.samplerConfig = r.samplerConfig;
+      this.rfileVersion = r.rfileVersion;
+      this.readers = r.readers;
+      this.sampleReaders = r.sampleReaders;
+
+      for (int i = 0; i < r.readers.length; i++) {
+        if (useSample) {
+          this.currentReaders[i] = new LocalityGroupReader(r.sampleReaders[i]);
+          this.currentReaders[i].setInterruptFlag(r.interruptFlag);
+        } else {
+          this.currentReaders[i] = new LocalityGroupReader(r.readers[i]);
+          this.currentReaders[i].setInterruptFlag(r.interruptFlag);
+        }
+
       }
+
     }
 
     private void closeLocalityGroupReaders() {
-      for (LocalityGroupReader lgr : lgReaders) {
+      for (LocalityGroupReader lgr : currentReaders) {
         try {
           lgr.close();
         } catch (IOException e) {
@@ -926,6 +1148,16 @@ public class RFile {
       closeDeepCopies();
       closeLocalityGroupReaders();
 
+      if (sampleReaders != null) {
+        for (LocalityGroupReader lgr : sampleReaders) {
+          try {
+            lgr.close();
+          } catch (IOException e) {
+            log.warn("Errored out attempting to close LocalityGroupReader.", e);
+          }
+        }
+      }
+
       try {
         reader.close();
       } finally {
@@ -937,17 +1169,17 @@ public class RFile {
 
     @Override
     public Key getFirstKey() throws IOException {
-      if (lgReaders.length == 0) {
+      if (currentReaders.length == 0) {
         return null;
       }
 
       Key minKey = null;
 
-      for (int i = 0; i < lgReaders.length; i++) {
+      for (int i = 0; i < currentReaders.length; i++) {
         if (minKey == null) {
-          minKey = lgReaders[i].getFirstKey();
+          minKey = currentReaders[i].getFirstKey();
         } else {
-          Key firstKey = lgReaders[i].getFirstKey();
+          Key firstKey = currentReaders[i].getFirstKey();
           if (firstKey != null && firstKey.compareTo(minKey) < 0)
             minKey = firstKey;
         }
@@ -958,17 +1190,17 @@ public class RFile {
 
     @Override
     public Key getLastKey() throws IOException {
-      if (lgReaders.length == 0) {
+      if (currentReaders.length == 0) {
         return null;
       }
 
       Key maxKey = null;
 
-      for (int i = 0; i < lgReaders.length; i++) {
+      for (int i = 0; i < currentReaders.length; i++) {
         if (maxKey == null) {
-          maxKey = lgReaders[i].getLastKey();
+          maxKey = currentReaders[i].getLastKey();
         } else {
-          Key lastKey = lgReaders[i].getLastKey();
+          Key lastKey = currentReaders[i].getLastKey();
           if (lastKey != null && lastKey.compareTo(maxKey) > 0)
             maxKey = lastKey;
         }
@@ -988,10 +1220,26 @@ public class RFile {
 
     @Override
     public SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) {
-      Reader copy = new Reader(this);
-      copy.setInterruptFlagInternal(interruptFlag);
-      deepCopies.add(copy);
-      return copy;
+      if (env != null && env.isSamplingEnabled()) {
+        SamplerConfiguration sc = env.getSamplerConfiguration();
+        if (sc == null) {
+          throw new SampleNotPresentException();
+        }
+
+        if (this.samplerConfig != null && this.samplerConfig.equals(new SamplerConfigurationImpl(sc))) {
+          Reader copy = new Reader(this, true);
+          copy.setInterruptFlagInternal(interruptFlag);
+          deepCopies.add(copy);
+          return copy;
+        } else {
+          throw new SampleNotPresentException();
+        }
+      } else {
+        Reader copy = new Reader(this, false);
+        copy.setInterruptFlagInternal(interruptFlag);
+        deepCopies.add(copy);
+        return copy;
+      }
     }
 
     @Override
@@ -1027,14 +1275,20 @@ public class RFile {
      */
     public void registerMetrics(MetricsGatherer<?> vmg) {
       vmg.init(getLocalityGroupCF());
-      for (LocalityGroupReader lgr : lgReaders) {
+      for (LocalityGroupReader lgr : currentReaders) {
         lgr.registerMetrics(vmg);
       }
+
+      if (sampleReaders != null) {
+        for (LocalityGroupReader lgr : sampleReaders) {
+          lgr.registerMetrics(vmg);
+        }
+      }
     }
 
     @Override
     public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {
-      numLGSeeked = LocalityGroupIterator.seek(this, lgReaders, nonDefaultColumnFamilies, range, columnFamilies, inclusive);
+      numLGSeeked = LocalityGroupIterator.seek(this, currentReaders, nonDefaultColumnFamilies, range, columnFamilies, inclusive);
     }
 
     int getNumLocalityGroupsSeeked() {
@@ -1045,16 +1299,53 @@ public class RFile {
 
       ArrayList<Iterator<IndexEntry>> indexes = new ArrayList<Iterator<IndexEntry>>();
 
-      for (LocalityGroupReader lgr : lgReaders) {
+      for (LocalityGroupReader lgr : currentReaders) {
         indexes.add(lgr.getIndex());
       }
 
       return new MultiIndexIterator(this, indexes);
     }
 
+    @Override
+    public FileSKVIterator getSample(SamplerConfigurationImpl sampleConfig) {
+      Preconditions.checkNotNull(sampleConfig);
+
+      if (this.samplerConfig != null && this.samplerConfig.equals(sampleConfig)) {
+        Reader copy = new Reader(this, sampleReaders);
+        copy.setInterruptFlagInternal(interruptFlag);
+        return copy;
+      }
+
+      return null;
+    }
+
+    // only visible for printinfo
+    FileSKVIterator getSample() {
+      if (samplerConfig == null)
+        return null;
+      return getSample(this.samplerConfig);
+    }
+
     public void printInfo() throws IOException {
+
+      System.out.printf("%-24s : %d\n", "RFile Version", rfileVersion);
+      System.out.println();
+
       for (LocalityGroupMetadata lgm : localityGroups) {
-        lgm.printInfo();
+        lgm.printInfo(false);
+      }
+
+      if (sampleGroups.size() > 0) {
+
+        System.out.println();
+        System.out.printf("%-24s :\n", "Sample Configuration");
+        System.out.printf("\t%-22s : %s\n", "Sampler class ", samplerConfig.getClassName());
+        System.out.printf("\t%-22s : %s\n", "Sampler options ", samplerConfig.getOptions());
+        System.out.println();
+
+        for (LocalityGroupMetadata lgm : sampleGroups) {
+          lgm.printInfo(true);
+        }
       }
     }
 
@@ -1071,7 +1362,7 @@ public class RFile {
 
     private void setInterruptFlagInternal(AtomicBoolean flag) {
       this.interruptFlag = flag;
-      for (LocalityGroupReader lgr : lgReaders) {
+      for (LocalityGroupReader lgr : currentReaders) {
         lgr.setInterruptFlag(interruptFlag);
       }
     }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/file/rfile/RFileOperations.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFileOperations.java b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFileOperations.java
index 088abfe..17e8e96 100644
--- a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFileOperations.java
+++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFileOperations.java
@@ -33,6 +33,9 @@ import org.apache.accumulo.core.file.blockfile.cache.BlockCache;
 import org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile;
 import org.apache.accumulo.core.file.rfile.RFile.Reader;
 import org.apache.accumulo.core.file.rfile.RFile.Writer;
+import org.apache.accumulo.core.sample.Sampler;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
+import org.apache.accumulo.core.sample.impl.SamplerFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -123,8 +126,15 @@ public class RFileOperations extends FileOperations {
     long blockSize = acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE);
     long indexBlockSize = acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX);
 
+    SamplerConfigurationImpl samplerConfig = SamplerConfigurationImpl.newSamplerConfig(acuconf);
+    Sampler sampler = null;
+
+    if (samplerConfig != null) {
+      sampler = SamplerFactory.newSampler(samplerConfig, acuconf);
+    }
+
     CachableBlockFile.Writer _cbw = new CachableBlockFile.Writer(fs.create(new Path(file), false, bufferSize, (short) rep, block), compression, conf, acuconf);
-    Writer writer = new RFile.Writer(_cbw, (int) blockSize, (int) indexBlockSize);
+    Writer writer = new RFile.Writer(_cbw, (int) blockSize, (int) indexBlockSize, samplerConfig, sampler);
     return writer;
   }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/iterators/IteratorEnvironment.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/iterators/IteratorEnvironment.java b/core/src/main/java/org/apache/accumulo/core/iterators/IteratorEnvironment.java
index 5a53e93..5dbafa6 100644
--- a/core/src/main/java/org/apache/accumulo/core/iterators/IteratorEnvironment.java
+++ b/core/src/main/java/org/apache/accumulo/core/iterators/IteratorEnvironment.java
@@ -18,6 +18,8 @@ package org.apache.accumulo.core.iterators;
 
 import java.io.IOException;
 
+import org.apache.accumulo.core.client.SampleNotPresentException;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Value;
@@ -37,4 +39,52 @@ public interface IteratorEnvironment {
   void registerSideChannel(SortedKeyValueIterator<Key,Value> iter);
 
   Authorizations getAuthorizations();
+
+  /**
+   * Returns a new iterator environment object that can be used to create deep copies over sample data. The new object created will use the current sampling
+   * configuration for the table. The existing iterator environment object will not be modified.
+   *
+   * <p>
+   * Since sample data could be created in many different ways, a good practice for an iterator is to verify the sampling configuration is as expected.
+   *
+   * <p>
+   *
+   * <pre>
+   * <code>
+   *   class MyIter implements SortedKeyValueIterator&lt;Key,Value&gt; {
+   *     SortedKeyValueIterator&lt;Key,Value&gt; source;
+   *     SortedKeyValueIterator&lt;Key,Value&gt; sampleIter;
+   *     &#64;Override
+   *     void init(SortedKeyValueIterator&lt;Key,Value&gt; source, Map&lt;String,String&gt; options, IteratorEnvironment env) {
+   *       IteratorEnvironment sampleEnv = env.cloneWithSamplingEnabled();
+   *       //do some sanity checks on sampling config
+   *       validateSamplingConfiguration(sampleEnv.getSamplerConfiguration());
+   *       sampleIter = source.deepCopy(sampleEnv);
+   *       this.source = source;
+   *     }
+   *   }
+   * </code>
+   * </pre>
+   *
+   * @throws SampleNotPresentException
+   *           when sampling is not configured for table.
+   * @since 1.8.0
+   */
+  IteratorEnvironment cloneWithSamplingEnabled();
+
+  /**
+   * There are at least two conditions under which sampling will be enabled for an environment. One condition is when sampling is enabled for the scan that
+   * starts everything. Another possibility is for a deep copy created with an environment created by calling {@link #cloneWithSamplingEnabled()}
+   *
+   * @return true if sampling is enabled for this environment.
+   * @since 1.8.0
+   */
+  boolean isSamplingEnabled();
+
+  /**
+   *
+   * @return sampling configuration is sampling is enabled for environment, otherwise returns null.
+   * @since 1.8.0
+   */
+  SamplerConfiguration getSamplerConfiguration();
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/iterators/SortedMapIterator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/iterators/SortedMapIterator.java b/core/src/main/java/org/apache/accumulo/core/iterators/SortedMapIterator.java
index 3999b6f..25c010d 100644
--- a/core/src/main/java/org/apache/accumulo/core/iterators/SortedMapIterator.java
+++ b/core/src/main/java/org/apache/accumulo/core/iterators/SortedMapIterator.java
@@ -24,6 +24,7 @@ import java.util.Map.Entry;
 import java.util.SortedMap;
 import java.util.concurrent.atomic.AtomicBoolean;
 
+import org.apache.accumulo.core.client.SampleNotPresentException;
 import org.apache.accumulo.core.data.ByteSequence;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Range;
@@ -53,6 +54,9 @@ public class SortedMapIterator implements InterruptibleIterator {
 
   @Override
   public SortedMapIterator deepCopy(IteratorEnvironment env) {
+    if (env != null && env.isSamplingEnabled()) {
+      throw new SampleNotPresentException();
+    }
     return new SortedMapIterator(map, interruptFlag);
   }
 

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/iterators/WrappingIterator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/iterators/WrappingIterator.java b/core/src/main/java/org/apache/accumulo/core/iterators/WrappingIterator.java
index 7723ef1..5b37b30 100644
--- a/core/src/main/java/org/apache/accumulo/core/iterators/WrappingIterator.java
+++ b/core/src/main/java/org/apache/accumulo/core/iterators/WrappingIterator.java
@@ -56,8 +56,6 @@ public abstract class WrappingIterator implements SortedKeyValueIterator<Key,Val
 
   @Override
   public Key getTopKey() {
-    if (source == null)
-      throw new IllegalStateException("no source set");
     if (seenSeek == false)
       throw new IllegalStateException("never been seeked");
     return getSource().getTopKey();
@@ -65,8 +63,6 @@ public abstract class WrappingIterator implements SortedKeyValueIterator<Key,Val
 
   @Override
   public Value getTopValue() {
-    if (source == null)
-      throw new IllegalStateException("no source set");
     if (seenSeek == false)
       throw new IllegalStateException("never been seeked");
     return getSource().getTopValue();
@@ -74,8 +70,6 @@ public abstract class WrappingIterator implements SortedKeyValueIterator<Key,Val
 
   @Override
   public boolean hasTop() {
-    if (source == null)
-      throw new IllegalStateException("no source set");
     if (seenSeek == false)
       throw new IllegalStateException("never been seeked");
     return getSource().hasTop();
@@ -89,8 +83,6 @@ public abstract class WrappingIterator implements SortedKeyValueIterator<Key,Val
 
   @Override
   public void next() throws IOException {
-    if (source == null)
-      throw new IllegalStateException("no source set");
     if (seenSeek == false)
       throw new IllegalStateException("never been seeked");
     getSource().next();

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/iterators/system/EmptyIterator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/iterators/system/EmptyIterator.java b/core/src/main/java/org/apache/accumulo/core/iterators/system/EmptyIterator.java
new file mode 100644
index 0000000..b791eb1
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/iterators/system/EmptyIterator.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.core.iterators.system;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.apache.accumulo.core.data.ByteSequence;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.IteratorEnvironment;
+import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
+
+public class EmptyIterator implements InterruptibleIterator {
+
+  public static final EmptyIterator EMPTY_ITERATOR = new EmptyIterator();
+
+  @Override
+  public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {}
+
+  @Override
+  public boolean hasTop() {
+    return false;
+  }
+
+  @Override
+  public void next() throws IOException {
+    // nothing should call this since hasTop always returns false
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {}
+
+  @Override
+  public Key getTopKey() {
+    // nothing should call this since hasTop always returns false
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public Value getTopValue() {
+    // nothing should call this since hasTop always returns false
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) {
+    return EMPTY_ITERATOR;
+  }
+
+  @Override
+  public void setInterruptFlag(AtomicBoolean flag) {}
+}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/iterators/system/MapFileIterator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/iterators/system/MapFileIterator.java b/core/src/main/java/org/apache/accumulo/core/iterators/system/MapFileIterator.java
index 9d59570..f9f0600 100644
--- a/core/src/main/java/org/apache/accumulo/core/iterators/system/MapFileIterator.java
+++ b/core/src/main/java/org/apache/accumulo/core/iterators/system/MapFileIterator.java
@@ -33,6 +33,7 @@ import org.apache.accumulo.core.file.map.MapFileUtil;
 import org.apache.accumulo.core.iterators.IterationInterruptedException;
 import org.apache.accumulo.core.iterators.IteratorEnvironment;
 import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -154,4 +155,9 @@ public class MapFileIterator implements FileSKVIterator {
   public void close() throws IOException {
     reader.close();
   }
+
+  @Override
+  public FileSKVIterator getSample(SamplerConfigurationImpl sampleConfig) {
+    return null;
+  }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/iterators/system/SampleIterator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/iterators/system/SampleIterator.java b/core/src/main/java/org/apache/accumulo/core/iterators/system/SampleIterator.java
new file mode 100644
index 0000000..aedcdba
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/iterators/system/SampleIterator.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.core.iterators.system;
+
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.Filter;
+import org.apache.accumulo.core.iterators.IteratorEnvironment;
+import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
+import org.apache.accumulo.core.sample.RowSampler;
+import org.apache.accumulo.core.sample.Sampler;
+
+public class SampleIterator extends Filter {
+
+  private Sampler sampler = new RowSampler();
+
+  public SampleIterator(SortedKeyValueIterator<Key,Value> iter, Sampler sampler) {
+    setSource(iter);
+    this.sampler = sampler;
+  }
+
+  @Override
+  public boolean accept(Key k, Value v) {
+    return sampler.accept(k);
+  }
+
+  @Override
+  public SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) {
+    return new SampleIterator(getSource().deepCopy(env), sampler);
+  }
+}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/iterators/system/SequenceFileIterator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/iterators/system/SequenceFileIterator.java b/core/src/main/java/org/apache/accumulo/core/iterators/system/SequenceFileIterator.java
index 8710acd..8ea3800 100644
--- a/core/src/main/java/org/apache/accumulo/core/iterators/system/SequenceFileIterator.java
+++ b/core/src/main/java/org/apache/accumulo/core/iterators/system/SequenceFileIterator.java
@@ -29,6 +29,7 @@ import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.file.FileSKVIterator;
 import org.apache.accumulo.core.iterators.IteratorEnvironment;
 import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.SequenceFile.Reader;
 
@@ -126,4 +127,9 @@ public class SequenceFileIterator implements FileSKVIterator {
   public void setInterruptFlag(AtomicBoolean flag) {
     throw new UnsupportedOperationException();
   }
+
+  @Override
+  public FileSKVIterator getSample(SamplerConfigurationImpl sampleConfig) {
+    throw new UnsupportedOperationException();
+  }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/sample/AbstractHashSampler.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/sample/AbstractHashSampler.java b/core/src/main/java/org/apache/accumulo/core/sample/AbstractHashSampler.java
new file mode 100644
index 0000000..ae2b951
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/sample/AbstractHashSampler.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.core.sample;
+
+import java.util.Set;
+
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
+import org.apache.accumulo.core.data.Key;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.hash.HashCode;
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hashing;
+
+/**
+ * A base class that can be used to create Samplers based on hashing. This class offers consistent options for configuring the hash function. The subclass
+ * decides which parts of the key to hash.
+ *
+ * <p>
+ * This class support two options passed into {@link #init(SamplerConfiguration)}. One option is {@code hasher} which specifies a hashing algorithm. Valid
+ * values for this option are {@code md5}, {@code sha1}, and {@code murmur3_32}. If you are not sure, then choose {@code murmur3_32}.
+ *
+ * <p>
+ * The second option is {@code modulus} which can have any positive integer as a value.
+ *
+ * <p>
+ * Any data where {@code hash(data) % modulus == 0} will be selected for the sample.
+ *
+ * @since 1.8.0
+ */
+
+public abstract class AbstractHashSampler implements Sampler {
+
+  private HashFunction hashFunction;
+  private int modulus;
+
+  private static final Set<String> VALID_OPTIONS = ImmutableSet.of("hasher", "modulus");
+
+  /**
+   * Subclasses with options should override this method and return true if the option is valid for the subclass or if {@code super.isValidOption(opt)} returns
+   * true.
+   */
+
+  protected boolean isValidOption(String option) {
+    return VALID_OPTIONS.contains(option);
+  }
+
+  /**
+   * Subclasses with options should override this method and call {@code super.init(config)}.
+   */
+
+  @Override
+  public void init(SamplerConfiguration config) {
+    String hasherOpt = config.getOptions().get("hasher");
+    String modulusOpt = config.getOptions().get("modulus");
+
+    Preconditions.checkNotNull(hasherOpt, "Hasher not specified");
+    Preconditions.checkNotNull(modulusOpt, "Modulus not specified");
+
+    for (String option : config.getOptions().keySet()) {
+      Preconditions.checkArgument(isValidOption(option), "Unknown option : %s", option);
+    }
+
+    switch (hasherOpt) {
+      case "murmur3_32":
+        hashFunction = Hashing.murmur3_32();
+        break;
+      case "md5":
+        hashFunction = Hashing.md5();
+        break;
+      case "sha1":
+        hashFunction = Hashing.sha1();
+        break;
+      default:
+        throw new IllegalArgumentException("Uknown hahser " + hasherOpt);
+    }
+
+    modulus = Integer.parseInt(modulusOpt);
+  }
+
+  /**
+   * Subclass must override this method and hash some portion of the key.
+   */
+  protected abstract HashCode hash(HashFunction hashFunction, Key k);
+
+  @Override
+  public boolean accept(Key k) {
+    return hash(hashFunction, k).asInt() % modulus == 0;
+  }
+}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/sample/RowColumnSampler.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/sample/RowColumnSampler.java b/core/src/main/java/org/apache/accumulo/core/sample/RowColumnSampler.java
new file mode 100644
index 0000000..ad68cf6
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/sample/RowColumnSampler.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.core.sample;
+
+import java.util.Set;
+
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
+import org.apache.accumulo.core.data.ByteSequence;
+import org.apache.accumulo.core.data.Key;
+
+import com.google.common.collect.ImmutableSet;
+import com.google.common.hash.HashCode;
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hasher;
+
+/**
+ * This sampler can hash any subset of a Key's fields. The fields that hashed for the sample are determined by the configuration options passed in
+ * {@link #init(SamplerConfiguration)}. The following key values are valid options.
+ *
+ * <UL>
+ * <li>row=true|false
+ * <li>family=true|false
+ * <li>qualifier=true|false
+ * <li>visibility=true|false
+ * </UL>
+ *
+ * <p>
+ * If not specified in the options, fields default to false.
+ *
+ * <p>
+ * To determine what options are valid for hashing see {@link AbstractHashSampler}
+ *
+ * <p>
+ * To configure Accumulo to generate sample data on one thousandth of the column qualifiers, the following SamplerConfiguration could be created and used to
+ * configure a table.
+ *
+ * <p>
+ * {@code new SamplerConfiguration(RowColumnSampler.class.getName()).setOptions(ImmutableMap.of("hasher","murmur3_32","modulus","1009","qualifier","true"))}
+ *
+ * <p>
+ * With this configuration, if a column qualifier is selected then all key values contains that column qualifier will end up in the sample data.
+ *
+ * @since 1.8.0
+ */
+
+public class RowColumnSampler extends AbstractHashSampler {
+
+  private boolean row = true;
+  private boolean family = true;
+  private boolean qualifier = true;
+  private boolean visibility = true;
+
+  private static final Set<String> VALID_OPTIONS = ImmutableSet.of("row", "family", "qualifier", "visibility");
+
+  private boolean hashField(SamplerConfiguration config, String field) {
+    String optValue = config.getOptions().get(field);
+    if (optValue != null) {
+      return Boolean.parseBoolean(optValue);
+    }
+
+    return false;
+  }
+
+  @Override
+  protected boolean isValidOption(String option) {
+    return super.isValidOption(option) || VALID_OPTIONS.contains(option);
+  }
+
+  @Override
+  public void init(SamplerConfiguration config) {
+    super.init(config);
+
+    row = hashField(config, "row");
+    family = hashField(config, "family");
+    qualifier = hashField(config, "qualifier");
+    visibility = hashField(config, "visibility");
+
+    if (!row && !family && !qualifier && !visibility) {
+      throw new IllegalStateException("Must hash at least one key field");
+    }
+  }
+
+  private void putByteSquence(ByteSequence data, Hasher hasher) {
+    hasher.putBytes(data.getBackingArray(), data.offset(), data.length());
+  }
+
+  @Override
+  protected HashCode hash(HashFunction hashFunction, Key k) {
+    Hasher hasher = hashFunction.newHasher();
+
+    if (row) {
+      putByteSquence(k.getRowData(), hasher);
+    }
+
+    if (family) {
+      putByteSquence(k.getColumnFamilyData(), hasher);
+    }
+
+    if (qualifier) {
+      putByteSquence(k.getColumnQualifierData(), hasher);
+    }
+
+    if (visibility) {
+      putByteSquence(k.getColumnVisibilityData(), hasher);
+    }
+
+    return hasher.hash();
+  }
+}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/sample/RowSampler.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/sample/RowSampler.java b/core/src/main/java/org/apache/accumulo/core/sample/RowSampler.java
new file mode 100644
index 0000000..8690a1c
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/sample/RowSampler.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.core.sample;
+
+import org.apache.accumulo.core.data.ByteSequence;
+import org.apache.accumulo.core.data.Key;
+
+import com.google.common.hash.HashCode;
+import com.google.common.hash.HashFunction;
+
+/**
+ * Builds a sample based on entire rows. If a row is selected for the sample, then all of its columns will be included.
+ *
+ * <p>
+ * To determine what options are valid for hashing see {@link AbstractHashSampler}. This class offers no addition options, it always hashes on the row.
+ *
+ * <p>
+ * To configure Accumulo to generate sample data on one thousandth of the rows, the following SamplerConfiguration could be created and used to configure a
+ * table.
+ *
+ * <p>
+ * {@code new SamplerConfiguration(RowSampler.class.getName()).setOptions(ImmutableMap.of("hasher","murmur3_32","modulus","1009"))}
+ *
+ * @since 1.8.0
+ */
+
+public class RowSampler extends AbstractHashSampler {
+
+  @Override
+  protected HashCode hash(HashFunction hashFunction, Key k) {
+    ByteSequence row = k.getRowData();
+    return hashFunction.hashBytes(row.getBackingArray(), row.offset(), row.length());
+  }
+}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/sample/Sampler.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/sample/Sampler.java b/core/src/main/java/org/apache/accumulo/core/sample/Sampler.java
new file mode 100644
index 0000000..64adeec
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/sample/Sampler.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.core.sample;
+
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
+import org.apache.accumulo.core.data.Key;
+
+/**
+ * A function that decides which key values are stored in a tables sample. As Accumuo compacts data and creates rfiles it uses a Sampler to decided what to
+ * store in the rfiles sample section. The class name of the Sampler and the Samplers configuration are stored in each rfile. A scan of a tables sample will
+ * only succeed if all rfiles were created with the same sampler and sampler configuration.
+ *
+ * <p>
+ * Since the decisions that Sampler makes are persisted, the behavior of a Sampler for a given configuration should always be the same. One way to offer a new
+ * behavior is to offer new options, while still supporting old behavior with a Samplers existing options.
+ *
+ * <p>
+ * Ideally a sampler that selects a Key k1 would also select updates for k1. For example if a Sampler selects :
+ * {@code row='000989' family='name' qualifier='last' visibility='ADMIN' time=9 value='Doe'}, it would be nice if it also selected :
+ * {@code row='000989' family='name' qualifier='last' visibility='ADMIN' time=20 value='Dough'}. Using hash and modulo on the key fields is a good way to
+ * accomplish this and {@link AbstractHashSampler} provides a good basis for implementation.
+ *
+ * @since 1.8.0
+ */
+
+public interface Sampler {
+
+  /**
+   * An implementation of Sampler must have a noarg constructor. After construction this method is called once to initialize a sampler before it is used.
+   *
+   * @param config
+   *          Configuration options for a sampler.
+   */
+  void init(SamplerConfiguration config);
+
+  /**
+   * @param k
+   *          A key that was written to a rfile.
+   * @return True if the key (and its associtated value) should be stored in the rfile's sample. Return false if it should not be included.
+   */
+  boolean accept(Key k);
+}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/sample/impl/SamplerConfigurationImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/sample/impl/SamplerConfigurationImpl.java b/core/src/main/java/org/apache/accumulo/core/sample/impl/SamplerConfigurationImpl.java
new file mode 100644
index 0000000..348def4
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/sample/impl/SamplerConfigurationImpl.java
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.core.sample.impl;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
+import org.apache.accumulo.core.conf.AccumuloConfiguration;
+import org.apache.accumulo.core.conf.Property;
+import org.apache.accumulo.core.tabletserver.thrift.TSamplerConfiguration;
+import org.apache.accumulo.core.util.Pair;
+import org.apache.hadoop.io.Writable;
+
+public class SamplerConfigurationImpl implements Writable {
+  private String className;
+  private Map<String,String> options;
+
+  public SamplerConfigurationImpl(DataInput in) throws IOException {
+    readFields(in);
+  }
+
+  public SamplerConfigurationImpl(SamplerConfiguration sc) {
+    this.className = sc.getSamplerClassName();
+    this.options = new HashMap<>(sc.getOptions());
+  }
+
+  public SamplerConfigurationImpl(String className, Map<String,String> options) {
+    this.className = className;
+    this.options = options;
+  }
+
+  public SamplerConfigurationImpl() {}
+
+  public String getClassName() {
+    return className;
+  }
+
+  public Map<String,String> getOptions() {
+    return Collections.unmodifiableMap(options);
+  }
+
+  @Override
+  public int hashCode() {
+    return 31 * className.hashCode() + options.hashCode();
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (o instanceof SamplerConfigurationImpl) {
+      SamplerConfigurationImpl osc = (SamplerConfigurationImpl) o;
+
+      return className.equals(osc.className) && options.equals(osc.options);
+    }
+
+    return false;
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    // The Writable serialization methods for this class are called by RFile and therefore must be very stable. An alternative way to serialize this class is to
+    // use Thrift. That was not used here inorder to avoid making RFile depend on Thrift.
+
+    // versioning info
+    out.write(1);
+
+    out.writeUTF(className);
+
+    out.writeInt(options.size());
+
+    for (Entry<String,String> entry : options.entrySet()) {
+      out.writeUTF(entry.getKey());
+      out.writeUTF(entry.getValue());
+    }
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    int version = in.readByte();
+
+    if (version != 1) {
+      throw new IllegalArgumentException("Unexpected version " + version);
+    }
+
+    className = in.readUTF();
+
+    options = new HashMap<String,String>();
+
+    int num = in.readInt();
+
+    for (int i = 0; i < num; i++) {
+      String key = in.readUTF();
+      String val = in.readUTF();
+      options.put(key, val);
+    }
+  }
+
+  public SamplerConfiguration toSamplerConfiguration() {
+    SamplerConfiguration sc = new SamplerConfiguration(className);
+    sc.setOptions(options);
+    return sc;
+  }
+
+  public List<Pair<String,String>> toTableProperties() {
+    ArrayList<Pair<String,String>> props = new ArrayList<>();
+
+    for (Entry<String,String> entry : options.entrySet()) {
+      props.add(new Pair<String,String>(Property.TABLE_SAMPLER_OPTS.getKey() + entry.getKey(), entry.getValue()));
+    }
+
+    // intentionally added last, so its set last
+    props.add(new Pair<String,String>(Property.TABLE_SAMPLER.getKey(), className));
+
+    return props;
+  }
+
+  public Map<String,String> toTablePropertiesMap() {
+    LinkedHashMap<String,String> propsMap = new LinkedHashMap<>();
+    for (Pair<String,String> pair : toTableProperties()) {
+      propsMap.put(pair.getFirst(), pair.getSecond());
+    }
+
+    return propsMap;
+  }
+
+  public static SamplerConfigurationImpl newSamplerConfig(AccumuloConfiguration acuconf) {
+    String className = acuconf.get(Property.TABLE_SAMPLER);
+
+    if (className == null || className.equals("")) {
+      return null;
+    }
+
+    Map<String,String> rawOptions = acuconf.getAllPropertiesWithPrefix(Property.TABLE_SAMPLER_OPTS);
+    Map<String,String> options = new HashMap<>();
+
+    for (Entry<String,String> entry : rawOptions.entrySet()) {
+      String key = entry.getKey().substring(Property.TABLE_SAMPLER_OPTS.getKey().length());
+      options.put(key, entry.getValue());
+    }
+
+    return new SamplerConfigurationImpl(className, options);
+  }
+
+  @Override
+  public String toString() {
+    return className + " " + options;
+  }
+
+  public static TSamplerConfiguration toThrift(SamplerConfiguration samplerConfig) {
+    if (samplerConfig == null)
+      return null;
+    return new TSamplerConfiguration(samplerConfig.getSamplerClassName(), samplerConfig.getOptions());
+  }
+
+  public static SamplerConfiguration fromThrift(TSamplerConfiguration tsc) {
+    if (tsc == null)
+      return null;
+    return new SamplerConfiguration(tsc.getClassName()).setOptions(tsc.getOptions());
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/sample/impl/SamplerFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/sample/impl/SamplerFactory.java b/core/src/main/java/org/apache/accumulo/core/sample/impl/SamplerFactory.java
new file mode 100644
index 0000000..3f11fbe
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/sample/impl/SamplerFactory.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.core.sample.impl;
+
+import java.io.IOException;
+
+import org.apache.accumulo.core.conf.AccumuloConfiguration;
+import org.apache.accumulo.core.conf.Property;
+import org.apache.accumulo.core.sample.Sampler;
+import org.apache.accumulo.start.classloader.vfs.AccumuloVFSClassLoader;
+
+public class SamplerFactory {
+  public static Sampler newSampler(SamplerConfigurationImpl config, AccumuloConfiguration acuconf) throws IOException {
+    String context = acuconf.get(Property.TABLE_CLASSPATH);
+
+    Class<? extends Sampler> clazz;
+    try {
+      if (context != null && !context.equals(""))
+        clazz = AccumuloVFSClassLoader.getContextManager().loadClass(context, config.getClassName(), Sampler.class);
+      else
+        clazz = AccumuloVFSClassLoader.loadClass(config.getClassName(), Sampler.class);
+
+      Sampler sampler = clazz.newInstance();
+
+      sampler.init(config.toSamplerConfiguration());
+
+      return sampler;
+
+    } catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) {
+      throw new RuntimeException(e);
+    }
+  }
+}