You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by el...@apache.org on 2013/11/22 21:21:15 UTC

[1/5] git commit: ACCUMULO-1914 make WALog cleanup after recovery an atomic mutation

Updated Branches:
  refs/heads/ACCUMULO-1854-merge 3beb9f710 -> e1dd6f9b2 (forced update)


ACCUMULO-1914 make WALog cleanup after recovery an atomic mutation


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/2640ea9d
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/2640ea9d
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/2640ea9d

Branch: refs/heads/ACCUMULO-1854-merge
Commit: 2640ea9d926f771274ed10efd23e7400a483f6c0
Parents: 3f633bf
Author: Eric Newton <er...@gmail.com>
Authored: Thu Nov 21 12:19:01 2013 -0500
Committer: Eric Newton <er...@gmail.com>
Committed: Thu Nov 21 12:55:51 2013 -0500

----------------------------------------------------------------------
 .../org/apache/accumulo/server/util/MetadataTable.java  | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/2640ea9d/src/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java
----------------------------------------------------------------------
diff --git a/src/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java b/src/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java
index 9c4eebf..a9b72e0 100644
--- a/src/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java
+++ b/src/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java
@@ -991,8 +991,8 @@ public class MetadataTable extends org.apache.accumulo.core.util.MetadataTable {
   }
   
   public static void removeUnusedWALEntries(KeyExtent extent, List<LogEntry> logEntries, ZooLock zooLock) {
-    for (LogEntry entry : logEntries) {
-      if (entry.extent.equals(Constants.ROOT_TABLET_EXTENT)) {
+      if (extent.equals(Constants.ROOT_TABLET_EXTENT)) {
+        for (LogEntry entry : logEntries) {
         String root = getZookeeperLogLocation();
         while (true) {
           try {
@@ -1005,11 +1005,13 @@ public class MetadataTable extends org.apache.accumulo.core.util.MetadataTable {
           }
           UtilWaitThread.sleep(1000);
         }
+        }
       } else {
-        Mutation m = new Mutation(entry.extent.getMetadataEntry());
-        m.putDelete(Constants.METADATA_LOG_COLUMN_FAMILY, new Text(entry.server + "/" + entry.filename));
+        Mutation m = new Mutation(extent.getMetadataEntry());
+        for (LogEntry entry : logEntries) {
+          m.putDelete(Constants.METADATA_LOG_COLUMN_FAMILY, new Text(entry.server + "/" + entry.filename));
+        }
         update(SecurityConstants.getSystemCredentials(), zooLock, m);
-      }
     }
   }
   


[4/5] git commit: ACCUMULO-1854 Remove unnecessary arguments from methods

Posted by el...@apache.org.
ACCUMULO-1854 Remove unnecessary arguments from methods


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/8dd3ae4b
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/8dd3ae4b
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/8dd3ae4b

Branch: refs/heads/ACCUMULO-1854-merge
Commit: 8dd3ae4b1d614182f26d382d3d16956726d26702
Parents: 7311481
Author: Josh Elser <el...@apache.org>
Authored: Thu Nov 21 20:18:16 2013 -0500
Committer: Josh Elser <el...@apache.org>
Committed: Fri Nov 22 15:17:58 2013 -0500

----------------------------------------------------------------------
 .../core/client/mapreduce/InputFormatBase.java  | 34 ++++++--------------
 1 file changed, 10 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/8dd3ae4b/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
----------------------------------------------------------------------
diff --git a/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java b/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
index 40e09a1..bd90b8c 100644
--- a/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
+++ b/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
@@ -1057,25 +1057,18 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
       }
     }
 
-    // Apply the configured iterators from the job to the scanner
-    /**
-     * @deprecated Use {@link #setupIterators(Configuration,Scanner)} instead
-     */
-    protected void setupIterators(TaskAttemptContext attempt, Scanner scanner, List<AccumuloIterator> iterators, List<AccumuloIteratorOption> options)
-        throws AccumuloException {
-      setupIterators(attempt.getConfiguration(), scanner, iterators, options);
-    }
-
     /**
      * Apply the configured iterators from the configuration to the scanner.
      * 
-     * @param conf
-     *          the Hadoop configuration object
      * @param scanner
      *          the scanner to configure
+     * @param iterators
+     *          the iterators to configure on the scanner
+     * @param options
+     *          options for each configured iterator
      * @throws AccumuloException
      */
-    protected void setupIterators(Configuration conf, Scanner scanner, List<AccumuloIterator> iterators, List<AccumuloIteratorOption> options)
+    protected void setupIterators(Scanner scanner, List<AccumuloIterator> iterators, List<AccumuloIteratorOption> options)
         throws AccumuloException {
 
       Map<String,IteratorSetting> scanIterators = new HashMap<String,IteratorSetting>();
@@ -1091,21 +1084,14 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
     }
 
     /**
-     * @deprecated Use {@link #setupMaxVersions(Configuration,Scanner)} instead
-     */
-    protected void setupMaxVersions(TaskAttemptContext attempt, Scanner scanner, int maxVersions) {
-      setupMaxVersions(attempt.getConfiguration(), scanner, maxVersions);
-    }
-
-    /**
      * If maxVersions has been set, configure a {@link VersioningIterator} at priority 0 for this scanner.
      * 
-     * @param conf
-     *          the Hadoop configuration object
      * @param scanner
      *          the scanner to configure
+     * @param maxVersions
+     *          the number of versions to return
      */
-    protected void setupMaxVersions(Configuration conf, Scanner scanner, int maxVersions) {
+    protected void setupMaxVersions(Scanner scanner, int maxVersions) {
       // Check to make sure its a legit value
       if (maxVersions >= 1) {
         IteratorSetting vers = new IteratorSetting(0, "vers", VersioningIterator.class);
@@ -1222,13 +1208,13 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
           log.info("Using local iterators");
           scanner = new ClientSideIteratorScanner(scanner);
         }
-        setupMaxVersions(conf, scanner, maxVersions);
+        setupMaxVersions(scanner, maxVersions);
         if (rowRegex != null || colfRegex != null || colqRegex != null || valueRegex != null) {
           IteratorSetting is = new IteratorSetting(50, RegExFilter.class);
           RegExFilter.setRegexs(is, rowRegex, colfRegex, colqRegex, valueRegex, false);
           scanner.addScanIterator(is);
         }
-        setupIterators(conf, scanner, iterators, options);
+        setupIterators(scanner, iterators, options);
       } catch (Exception e) {
         throw new IOException(e);
       }


[5/5] git commit: ACCUMULO-1854 Remove todo and finish toString

Posted by el...@apache.org.
ACCUMULO-1854 Remove todo and finish toString


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/e1dd6f9b
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/e1dd6f9b
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/e1dd6f9b

Branch: refs/heads/ACCUMULO-1854-merge
Commit: e1dd6f9b28fa579eada4334b7072fca4155add15
Parents: 8dd3ae4
Author: Josh Elser <el...@apache.org>
Authored: Fri Nov 22 15:14:23 2013 -0500
Committer: Josh Elser <el...@apache.org>
Committed: Fri Nov 22 15:17:58 2013 -0500

----------------------------------------------------------------------
 .../core/client/mapreduce/RangeInputSplit.java   | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/e1dd6f9b/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
----------------------------------------------------------------------
diff --git a/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java b/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
index e372801..69f2b38 100644
--- a/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
+++ b/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
@@ -311,7 +311,24 @@ public class RangeInputSplit extends InputSplit implements Writable {
     sb.append("Range: ").append(range);
     sb.append(" Locations: ").append(locations);
     sb.append(" Table: ").append(table);
-    // TODO finish building of string
+    sb.append(" InstanceName: ").append(instanceName);
+    sb.append(" zooKeepers: ").append(zooKeepers);
+    sb.append(" username: ").append(username);
+    sb.append(" password: ").append(new String(password));
+    sb.append(" Authorizations: ").append(auths);
+    sb.append(" offlineScan: ").append(offline);
+    sb.append(" mockInstance: ").append(mockInstance);
+    sb.append(" isolatedScan: ").append(isolatedScan);
+    sb.append(" localIterators: ").append(localIterators);
+    sb.append(" maxVersions: ").append(maxVersions);
+    sb.append(" rowRegex: ").append(rowRegex);
+    sb.append(" colfamRegex: ").append(colfamRegex);
+    sb.append(" colqualRegex: ").append(colqualRegex);
+    sb.append(" valueRegex: ").append(valueRegex);
+    sb.append(" fetchColumns: ").append(fetchedColumns);
+    sb.append(" iterators: ").append(iterators);
+    sb.append(" options: ").append(options);
+    sb.append(" logLevel: ").append(level);
     return sb.toString();
   }
 


[3/5] git commit: Squashed commit of the following:

Posted by el...@apache.org.
Squashed commit of the following:

commit dfbe098fb650d1d1605ac28ff0b195e229ecb345
Author: Josh Elser <el...@apache.org>
Date:   Wed Nov 20 23:57:18 2013 -0500

    ACCUMULO-1843 Add in log4j Level to RangeInputSplit. Add more tests, notably ones that exercise delegation of the input
    split to the Configuration.

commit 38fdee9916edd938bea1642de5d4e5cf54a81596
Author: Josh Elser <el...@apache.org>
Date:   Fri Nov 8 17:47:57 2013 -0500

    ACCUMULO-1854 Fix up InputFormatBase to use the information stored on
    RangeInputSplit and fall back onto the Configuration.

commit 0e6d1aba7eacef357e0a17c67a453dd5b50a49dc
Author: Josh Elser <el...@apache.org>
Date:   Fri Nov 8 16:23:49 2013 -0500

    ACCUMULO-1854 Clean up constructors. Add a test.

commit 2f59f81f6e75f8a90ccfe3df00c6ad3f69174e0c
Author: Josh Elser <el...@apache.org>
Date:   Fri Nov 8 15:46:39 2013 -0500

    ACCUMULO-1854 Move RangeInputSplit into its own file and store all
    connection information into it.


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/73114819
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/73114819
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/73114819

Branch: refs/heads/ACCUMULO-1854-merge
Commit: 73114819e85714f56838e2bcf16bf9b5c6c6a397
Parents: 2640ea9
Author: Josh Elser <el...@apache.org>
Authored: Thu Nov 21 00:19:59 2013 -0500
Committer: Josh Elser <el...@apache.org>
Committed: Fri Nov 22 15:17:57 2013 -0500

----------------------------------------------------------------------
 .../client/mapreduce/AccumuloInputFormat.java   |  11 +
 .../core/client/mapreduce/InputFormatBase.java  | 615 ++++++++++---------
 .../core/client/mapreduce/RangeInputSplit.java  | 493 +++++++++++++++
 .../mapreduce/AccumuloInputFormatTest.java      | 253 ++++++--
 .../mapreduce/AccumuloRowInputFormatTest.java   |   1 -
 .../client/mapreduce/RangeInputSplitTest.java   | 100 +++
 .../simple/filedata/ChunkInputFormatTest.java   |   4 +-
 7 files changed, 1126 insertions(+), 351 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/73114819/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java
----------------------------------------------------------------------
diff --git a/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java b/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java
index 4de131f..c9a70eb 100644
--- a/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java
+++ b/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java
@@ -25,6 +25,7 @@ import org.apache.accumulo.core.util.format.DefaultFormatter;
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.log4j.Level;
 
 /**
  * This class allows MapReduce jobs to use Accumulo as the source of data. This input format provides keys and values of type Key and Value to the Map() and
@@ -44,6 +45,16 @@ public class AccumuloInputFormat extends InputFormatBase<Key,Value> {
   @Override
   public RecordReader<Key,Value> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
     log.setLevel(getLogLevel(context.getConfiguration()));
+    
+    // Override the log level from the configuration as if the RangeInputSplit has one it's the more correct one to use.
+    if (split instanceof RangeInputSplit) {
+      RangeInputSplit risplit = (RangeInputSplit) split;
+      Level level = risplit.getLogLevel();
+      if (null != level) {
+        log.setLevel(level);
+      }
+    }
+
     return new RecordReaderBase<Key,Value>() {
       @Override
       public boolean nextKeyValue() throws IOException, InterruptedException {

http://git-wip-us.apache.org/repos/asf/accumulo/blob/73114819/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
----------------------------------------------------------------------
diff --git a/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java b/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
index 8e238f1..40e09a1 100644
--- a/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
+++ b/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
@@ -110,7 +110,7 @@ import org.apache.log4j.Logger;
 
 public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
   protected static final Logger log = Logger.getLogger(InputFormatBase.class);
-  
+
   private static final String PREFIX = AccumuloInputFormat.class.getSimpleName();
   private static final String INPUT_INFO_HAS_BEEN_SET = PREFIX + ".configured";
   private static final String INSTANCE_HAS_BEEN_SET = PREFIX + ".instanceConfigured";
@@ -118,34 +118,34 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
   private static final String PASSWORD = PREFIX + ".password";
   private static final String TABLE_NAME = PREFIX + ".tablename";
   private static final String AUTHORIZATIONS = PREFIX + ".authorizations";
-  
+
   private static final String INSTANCE_NAME = PREFIX + ".instanceName";
   private static final String ZOOKEEPERS = PREFIX + ".zooKeepers";
   private static final String MOCK = ".useMockInstance";
-  
+
   private static final String RANGES = PREFIX + ".ranges";
   private static final String AUTO_ADJUST_RANGES = PREFIX + ".ranges.autoAdjust";
-  
+
   private static final String ROW_REGEX = PREFIX + ".regex.row";
   private static final String COLUMN_FAMILY_REGEX = PREFIX + ".regex.cf";
   private static final String COLUMN_QUALIFIER_REGEX = PREFIX + ".regex.cq";
   private static final String VALUE_REGEX = PREFIX + ".regex.value";
-  
+
   private static final String COLUMNS = PREFIX + ".columns";
   private static final String LOGLEVEL = PREFIX + ".loglevel";
-  
+
   private static final String ISOLATED = PREFIX + ".isolated";
-  
+
   private static final String LOCAL_ITERATORS = PREFIX + ".localiters";
-  
+
   // Used to specify the maximum # of versions of an Accumulo cell value to return
   private static final String MAX_VERSIONS = PREFIX + ".maxVersions";
-  
+
   // Used for specifying the iterators to be applied
   private static final String ITERATORS = PREFIX + ".iterators";
   private static final String ITERATORS_OPTIONS = PREFIX + ".iterators.options";
   private static final String ITERATORS_DELIM = ",";
-  
+
   private static final String READ_OFFLINE = PREFIX + ".read.offline";
 
   /**
@@ -154,7 +154,7 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
   public static void setIsolated(JobContext job, boolean enable) {
     setIsolated(job.getConfiguration(), enable);
   }
-  
+
   /**
    * Enable or disable use of the {@link IsolatedScanner} in this configuration object. By default it is not enabled.
    * 
@@ -166,14 +166,14 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
   public static void setIsolated(Configuration conf, boolean enable) {
     conf.setBoolean(ISOLATED, enable);
   }
-  
+
   /**
    * @deprecated Use {@link #setLocalIterators(Configuration,boolean)} instead
    */
   public static void setLocalIterators(JobContext job, boolean enable) {
     setLocalIterators(job.getConfiguration(), enable);
   }
-  
+
   /**
    * Enable or disable use of the {@link ClientSideIteratorScanner} in this Configuration object. By default it is not enabled.
    * 
@@ -185,14 +185,14 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
   public static void setLocalIterators(Configuration conf, boolean enable) {
     conf.setBoolean(LOCAL_ITERATORS, enable);
   }
-  
+
   /**
    * @deprecated Use {@link #setInputInfo(Configuration,String,byte[],String,Authorizations)} instead
    */
   public static void setInputInfo(JobContext job, String user, byte[] passwd, String table, Authorizations auths) {
     setInputInfo(job.getConfiguration(), user, passwd, table, auths);
   }
-  
+
   /**
    * Initialize the user, table, and authorization information for the configuration object that will be used with an Accumulo InputFormat.
    * 
@@ -211,7 +211,7 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
     if (conf.getBoolean(INPUT_INFO_HAS_BEEN_SET, false))
       throw new IllegalStateException("Input info can only be set once per job");
     conf.setBoolean(INPUT_INFO_HAS_BEEN_SET, true);
-    
+
     ArgumentChecker.notNull(user, passwd, table);
     conf.set(USERNAME, user);
     conf.set(PASSWORD, new String(Base64.encodeBase64(passwd)));
@@ -219,14 +219,14 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
     if (auths != null && !auths.isEmpty())
       conf.set(AUTHORIZATIONS, auths.serialize());
   }
-  
+
   /**
    * @deprecated Use {@link #setZooKeeperInstance(Configuration,String,String)} instead
    */
   public static void setZooKeeperInstance(JobContext job, String instanceName, String zooKeepers) {
     setZooKeeperInstance(job.getConfiguration(), instanceName, zooKeepers);
   }
-  
+
   /**
    * Configure a {@link ZooKeeperInstance} for this configuration object.
    * 
@@ -241,19 +241,19 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
     if (conf.getBoolean(INSTANCE_HAS_BEEN_SET, false))
       throw new IllegalStateException("Instance info can only be set once per job");
     conf.setBoolean(INSTANCE_HAS_BEEN_SET, true);
-    
+
     ArgumentChecker.notNull(instanceName, zooKeepers);
     conf.set(INSTANCE_NAME, instanceName);
     conf.set(ZOOKEEPERS, zooKeepers);
   }
-  
+
   /**
    * @deprecated Use {@link #setMockInstance(Configuration,String)} instead
    */
   public static void setMockInstance(JobContext job, String instanceName) {
     setMockInstance(job.getConfiguration(), instanceName);
   }
-  
+
   /**
    * Configure a {@link MockInstance} for this configuration object.
    * 
@@ -267,14 +267,14 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
     conf.setBoolean(MOCK, true);
     conf.set(INSTANCE_NAME, instanceName);
   }
-  
+
   /**
    * @deprecated Use {@link #setRanges(Configuration,Collection)} instead
    */
   public static void setRanges(JobContext job, Collection<Range> ranges) {
     setRanges(job.getConfiguration(), ranges);
   }
-  
+
   /**
    * Set the ranges to map over for this configuration object.
    * 
@@ -297,14 +297,14 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
     }
     conf.setStrings(RANGES, rangeStrings.toArray(new String[0]));
   }
-  
+
   /**
    * @deprecated Use {@link #disableAutoAdjustRanges(Configuration)} instead
    */
   public static void disableAutoAdjustRanges(JobContext job) {
     disableAutoAdjustRanges(job.getConfiguration());
   }
-  
+
   /**
    * Disables the adjustment of ranges for this configuration object. By default, overlapping ranges will be merged and ranges will be fit to existing tablet
    * boundaries. Disabling this adjustment will cause there to be exactly one mapper per range set using {@link #setRanges(Configuration, Collection)}.
@@ -315,14 +315,14 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
   public static void disableAutoAdjustRanges(Configuration conf) {
     conf.setBoolean(AUTO_ADJUST_RANGES, false);
   }
-  
+
   /**
    * @deprecated since 1.4 use {@link org.apache.accumulo.core.iterators.user.RegExFilter} and {@link #addIterator(Configuration, IteratorSetting)}
    */
   public static enum RegexType {
     ROW, COLUMN_FAMILY, COLUMN_QUALIFIER, VALUE
   }
-  
+
   /**
    * @deprecated since 1.4 use {@link #addIterator(Configuration, IteratorSetting)}
    * @see org.apache.accumulo.core.iterators.user.RegExFilter#setRegexs(IteratorSetting, String, String, String, String, boolean)
@@ -356,14 +356,14 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
       throw new RuntimeException(e);
     }
   }
-  
+
   /**
    * @deprecated Use {@link #setMaxVersions(Configuration,int)} instead
    */
   public static void setMaxVersions(JobContext job, int maxVersions) throws IOException {
     setMaxVersions(job.getConfiguration(), maxVersions);
   }
-  
+
   /**
    * Sets the max # of values that may be returned for an individual Accumulo cell. By default, applied before all other Accumulo iterators (highest priority)
    * leveraged in the scan by the record reader. To adjust priority use setIterator() & setIteratorOptions() w/ the VersioningIterator type explicitly.
@@ -380,7 +380,7 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
       throw new IOException("Invalid maxVersions: " + maxVersions + ".  Must be >= 1");
     conf.setInt(MAX_VERSIONS, maxVersions);
   }
-  
+
   /**
    * <p>
    * Enable reading offline tables. This will make the map reduce job directly read the tables files. If the table is not offline, then the job will fail. If
@@ -407,18 +407,18 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
    * @param scanOff
    *          pass true to read offline tables
    */
-  
+
   public static void setScanOffline(Configuration conf, boolean scanOff) {
     conf.setBoolean(READ_OFFLINE, scanOff);
   }
-  
+
   /**
    * @deprecated Use {@link #fetchColumns(Configuration,Collection)} instead
    */
   public static void fetchColumns(JobContext job, Collection<Pair<Text,Text>> columnFamilyColumnQualifierPairs) {
     fetchColumns(job.getConfiguration(), columnFamilyColumnQualifierPairs);
   }
-  
+
   /**
    * Restricts the columns that will be mapped over for this configuration object.
    * 
@@ -429,27 +429,33 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
    *          selected. An empty set is the default and is equivalent to scanning the all columns.
    */
   public static void fetchColumns(Configuration conf, Collection<Pair<Text,Text>> columnFamilyColumnQualifierPairs) {
+    String[] columnStrings = serializeColumns(columnFamilyColumnQualifierPairs);
+    conf.setStrings(COLUMNS, columnStrings);
+  }
+
+  public static String[] serializeColumns(Collection<Pair<Text,Text>> columnFamilyColumnQualifierPairs) {
     ArgumentChecker.notNull(columnFamilyColumnQualifierPairs);
     ArrayList<String> columnStrings = new ArrayList<String>(columnFamilyColumnQualifierPairs.size());
     for (Pair<Text,Text> column : columnFamilyColumnQualifierPairs) {
       if (column.getFirst() == null)
         throw new IllegalArgumentException("Column family can not be null");
-      
+
       String col = new String(Base64.encodeBase64(TextUtil.getBytes(column.getFirst())));
       if (column.getSecond() != null)
         col += ":" + new String(Base64.encodeBase64(TextUtil.getBytes(column.getSecond())));
       columnStrings.add(col);
     }
-    conf.setStrings(COLUMNS, columnStrings.toArray(new String[0]));
+
+    return columnStrings.toArray(new String[0]);
   }
-  
+
   /**
    * @deprecated Use {@link #setLogLevel(Configuration,Level)} instead
    */
   public static void setLogLevel(JobContext job, Level level) {
     setLogLevel(job.getConfiguration(), level);
   }
-  
+
   /**
    * Sets the log level for this configuration object.
    * 
@@ -463,14 +469,14 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
     log.setLevel(level);
     conf.setInt(LOGLEVEL, level.toInt());
   }
-  
+
   /**
    * @deprecated Use {@link #addIterator(Configuration,IteratorSetting)} instead
    */
   public static void addIterator(JobContext job, IteratorSetting cfg) {
     addIterator(job.getConfiguration(), cfg);
   }
-  
+
   /**
    * Encode an iterator on the input for this configuration object.
    * 
@@ -482,7 +488,7 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
   public static void addIterator(Configuration conf, IteratorSetting cfg) {
     // First check to see if anything has been set already
     String iterators = conf.get(ITERATORS);
-    
+
     // No iterators specified yet, create a new string
     if (iterators == null || iterators.isEmpty()) {
       iterators = new AccumuloIterator(cfg.getPriority(), cfg.getIteratorClass(), cfg.getName()).toString();
@@ -495,9 +501,9 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
     for (Entry<String,String> entry : cfg.getOptions().entrySet()) {
       if (entry.getValue() == null)
         continue;
-      
+
       String iteratorOptions = conf.get(ITERATORS_OPTIONS);
-      
+
       // No options specified yet, create a new string
       if (iteratorOptions == null || iteratorOptions.isEmpty()) {
         iteratorOptions = new AccumuloIteratorOption(cfg.getName(), entry.getKey(), entry.getValue()).toString();
@@ -505,12 +511,12 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
         // append the next option & reset
         iteratorOptions = iteratorOptions.concat(ITERATORS_DELIM + new AccumuloIteratorOption(cfg.getName(), entry.getKey(), entry.getValue()));
       }
-      
+
       // Store the options w/ the job
       conf.set(ITERATORS_OPTIONS, iteratorOptions);
     }
   }
-  
+
   /**
    * Specify an Accumulo iterator type to manage the behavior of the underlying table scan this InputFormat's RecordReader will conduct, w/ priority dictating
    * the order in which specified iterators are applied. Repeat calls to specify multiple iterators are allowed.
@@ -529,7 +535,7 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
   public static void setIterator(JobContext job, int priority, String iteratorClass, String iteratorName) {
     // First check to see if anything has been set already
     String iterators = job.getConfiguration().get(ITERATORS);
-    
+
     // No iterators specified yet, create a new string
     if (iterators == null || iterators.isEmpty()) {
       iterators = new AccumuloIterator(priority, iteratorClass, iteratorName).toString();
@@ -539,9 +545,9 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
     }
     // Store the iterators w/ the job
     job.getConfiguration().set(ITERATORS, iterators);
-    
+
   }
-  
+
   /**
    * Specify an option for a named Accumulo iterator, further specifying that iterator's behavior.
    * 
@@ -559,9 +565,9 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
   public static void setIteratorOption(JobContext job, String iteratorName, String key, String value) {
     if (iteratorName == null || key == null || value == null)
       return;
-    
+
     String iteratorOptions = job.getConfiguration().get(ITERATORS_OPTIONS);
-    
+
     // No options specified yet, create a new string
     if (iteratorOptions == null || iteratorOptions.isEmpty()) {
       iteratorOptions = new AccumuloIteratorOption(iteratorName, key, value).toString();
@@ -569,18 +575,18 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
       // append the next option & reset
       iteratorOptions = iteratorOptions.concat(ITERATORS_DELIM + new AccumuloIteratorOption(iteratorName, key, value));
     }
-    
+
     // Store the options w/ the job
     job.getConfiguration().set(ITERATORS_OPTIONS, iteratorOptions);
   }
-  
+
   /**
    * @deprecated Use {@link #isIsolated(Configuration)} instead
    */
   protected static boolean isIsolated(JobContext job) {
     return isIsolated(job.getConfiguration());
   }
-  
+
   /**
    * Determines whether a configuration has isolation enabled.
    * 
@@ -592,14 +598,14 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
   protected static boolean isIsolated(Configuration conf) {
     return conf.getBoolean(ISOLATED, false);
   }
-  
+
   /**
    * @deprecated Use {@link #usesLocalIterators(Configuration)} instead
    */
   protected static boolean usesLocalIterators(JobContext job) {
     return usesLocalIterators(job.getConfiguration());
   }
-  
+
   /**
    * Determines whether a configuration uses local iterators.
    * 
@@ -611,14 +617,14 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
   protected static boolean usesLocalIterators(Configuration conf) {
     return conf.getBoolean(LOCAL_ITERATORS, false);
   }
-  
+
   /**
    * @deprecated Use {@link #getUsername(Configuration)} instead
    */
   protected static String getUsername(JobContext job) {
     return getUsername(job.getConfiguration());
   }
-  
+
   /**
    * Gets the user name from the configuration.
    * 
@@ -630,7 +636,7 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
   protected static String getUsername(Configuration conf) {
     return conf.get(USERNAME);
   }
-  
+
   /**
    * WARNING: The password is stored in the Configuration and shared with all MapReduce tasks; It is BASE64 encoded to provide a charset safe conversion to a
    * string, and is not intended to be secure.
@@ -640,7 +646,7 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
   protected static byte[] getPassword(JobContext job) {
     return getPassword(job.getConfiguration());
   }
-  
+
   /**
    * Gets the password from the configuration. WARNING: The password is stored in the Configuration and shared with all MapReduce tasks; It is BASE64 encoded to
    * provide a charset safe conversion to a string, and is not intended to be secure.
@@ -653,14 +659,14 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
   protected static byte[] getPassword(Configuration conf) {
     return Base64.decodeBase64(conf.get(PASSWORD, "").getBytes());
   }
-  
+
   /**
    * @deprecated Use {@link #getTablename(Configuration)} instead
    */
   protected static String getTablename(JobContext job) {
     return getTablename(job.getConfiguration());
   }
-  
+
   /**
    * Gets the table name from the configuration.
    * 
@@ -672,14 +678,14 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
   protected static String getTablename(Configuration conf) {
     return conf.get(TABLE_NAME);
   }
-  
+
   /**
    * @deprecated Use {@link #getAuthorizations(Configuration)} instead
    */
   protected static Authorizations getAuthorizations(JobContext job) {
     return getAuthorizations(job.getConfiguration());
   }
-  
+
   /**
    * Gets the authorizations to set for the scans from the configuration.
    * 
@@ -692,14 +698,14 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
     String authString = conf.get(AUTHORIZATIONS);
     return authString == null ? Constants.NO_AUTHS : new Authorizations(authString.split(","));
   }
-  
+
   /**
    * @deprecated Use {@link #getInstance(Configuration)} instead
    */
   protected static Instance getInstance(JobContext job) {
     return getInstance(job.getConfiguration());
   }
-  
+
   /**
    * Initializes an Accumulo {@link Instance} based on the configuration.
    * 
@@ -714,14 +720,14 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
       return new MockInstance(conf.get(INSTANCE_NAME));
     return new ZooKeeperInstance(conf.get(INSTANCE_NAME), conf.get(ZOOKEEPERS));
   }
-  
+
   /**
    * @deprecated Use {@link #getTabletLocator(Configuration)} instead
    */
   protected static TabletLocator getTabletLocator(JobContext job) throws TableNotFoundException {
     return getTabletLocator(job.getConfiguration());
   }
-  
+
   /**
    * Initializes an Accumulo {@link TabletLocator} based on the configuration.
    * 
@@ -741,14 +747,14 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
     return TabletLocator.getInstance(instance, new AuthInfo(username, ByteBuffer.wrap(password), instance.getInstanceID()),
         new Text(Tables.getTableId(instance, tableName)));
   }
-  
+
   /**
    * @deprecated Use {@link #getRanges(Configuration)} instead
    */
   protected static List<Range> getRanges(JobContext job) throws IOException {
     return getRanges(job.getConfiguration());
   }
-  
+
   /**
    * Gets the ranges to scan over from a configuration object.
    * 
@@ -769,7 +775,7 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
     }
     return ranges;
   }
-  
+
   /**
    * @deprecated since 1.4 use {@link org.apache.accumulo.core.iterators.user.RegExFilter} and {@link #addIterator(Configuration, IteratorSetting)}
    * @see #setRegex(JobContext, RegexType, String)
@@ -802,14 +808,14 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
       throw new RuntimeException(e);
     }
   }
-  
+
   /**
    * @deprecated Use {@link #getFetchedColumns(Configuration)} instead
    */
   protected static Set<Pair<Text,Text>> getFetchedColumns(JobContext job) {
     return getFetchedColumns(job.getConfiguration());
   }
-  
+
   /**
    * Gets the columns to be mapped over from this configuration object.
    * 
@@ -819,8 +825,19 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
    * @see #fetchColumns(Configuration, Collection)
    */
   protected static Set<Pair<Text,Text>> getFetchedColumns(Configuration conf) {
+    ArgumentChecker.notNull(conf);
+
+    return deserializeFetchedColumns(conf.getStrings(COLUMNS));
+  }
+
+  public static Set<Pair<Text,Text>> deserializeFetchedColumns(String[] serialized) {
     Set<Pair<Text,Text>> columns = new HashSet<Pair<Text,Text>>();
-    for (String col : conf.getStringCollection(COLUMNS)) {
+
+    if (null == serialized) {
+      return columns;
+    }
+
+    for (String col : serialized) {
       int idx = col.indexOf(":");
       Text cf = new Text(idx < 0 ? Base64.decodeBase64(col.getBytes()) : Base64.decodeBase64(col.substring(0, idx).getBytes()));
       Text cq = idx < 0 ? null : new Text(Base64.decodeBase64(col.substring(idx + 1).getBytes()));
@@ -828,14 +845,14 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
     }
     return columns;
   }
-  
+
   /**
    * @deprecated Use {@link #getAutoAdjustRanges(Configuration)} instead
    */
   protected static boolean getAutoAdjustRanges(JobContext job) {
     return getAutoAdjustRanges(job.getConfiguration());
   }
-  
+
   /**
    * Determines whether a configuration has auto-adjust ranges enabled.
    * 
@@ -847,14 +864,14 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
   protected static boolean getAutoAdjustRanges(Configuration conf) {
     return conf.getBoolean(AUTO_ADJUST_RANGES, true);
   }
-  
+
   /**
    * @deprecated Use {@link #getLogLevel(Configuration)} instead
    */
   protected static Level getLogLevel(JobContext job) {
     return getLogLevel(job.getConfiguration());
   }
-  
+
   /**
    * Gets the log level from this configuration.
    * 
@@ -866,7 +883,7 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
   protected static Level getLogLevel(Configuration conf) {
     return Level.toLevel(conf.getInt(LOGLEVEL, Level.INFO.toInt()));
   }
-  
+
   // InputFormat doesn't have the equivalent of OutputFormat's
   // checkOutputSpecs(JobContext job)
   /**
@@ -875,7 +892,7 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
   protected static void validateOptions(JobContext job) throws IOException {
     validateOptions(job.getConfiguration());
   }
-  
+
   // InputFormat doesn't have the equivalent of OutputFormat's
   // checkOutputSpecs(JobContext job)
   /**
@@ -898,7 +915,7 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
         throw new IOException("Unable to authenticate user");
       if (!c.securityOperations().hasTablePermission(getUsername(conf), getTablename(conf), TablePermission.READ))
         throw new IOException("Unable to access table");
-      
+
       if (!usesLocalIterators(conf)) {
         // validate that any scan-time iterators can be loaded by the the tablet servers
         for (AccumuloIterator iter : getIterators(conf)) {
@@ -906,21 +923,21 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
             throw new AccumuloException("Servers are unable to load " + iter.getIteratorClass() + " as a " + SortedKeyValueIterator.class.getName());
         }
       }
-      
+
     } catch (AccumuloException e) {
       throw new IOException(e);
     } catch (AccumuloSecurityException e) {
       throw new IOException(e);
     }
   }
-  
+
   /**
    * @deprecated Use {@link #getMaxVersions(Configuration)} instead
    */
   protected static int getMaxVersions(JobContext job) {
     return getMaxVersions(job.getConfiguration());
   }
-  
+
   /**
    * Gets the maxVersions to use for the {@link VersioningIterator} from this configuration.
    * 
@@ -932,7 +949,7 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
   protected static int getMaxVersions(Configuration conf) {
     return conf.getInt(MAX_VERSIONS, -1);
   }
-  
+
   protected static boolean isOfflineScan(Configuration conf) {
     return conf.getBoolean(READ_OFFLINE, false);
   }
@@ -945,7 +962,7 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
   protected static List<AccumuloIterator> getIterators(JobContext job) {
     return getIterators(job.getConfiguration());
   }
-  
+
   /**
    * Gets a list of the iterator settings (for iterators to apply to a scanner) from this configuration.
    * 
@@ -955,13 +972,13 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
    * @see #addIterator(Configuration, IteratorSetting)
    */
   protected static List<AccumuloIterator> getIterators(Configuration conf) {
-    
+
     String iterators = conf.get(ITERATORS);
-    
+
     // If no iterators are present, return an empty list
     if (iterators == null || iterators.isEmpty())
       return new ArrayList<AccumuloIterator>();
-    
+
     // Compose the set of iterators encoded in the job configuration
     StringTokenizer tokens = new StringTokenizer(conf.get(ITERATORS), ITERATORS_DELIM);
     List<AccumuloIterator> list = new ArrayList<AccumuloIterator>();
@@ -971,14 +988,14 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
     }
     return list;
   }
-  
+
   /**
    * @deprecated Use {@link #getIteratorOptions(Configuration)} instead
    */
   protected static List<AccumuloIteratorOption> getIteratorOptions(JobContext job) {
     return getIteratorOptions(job.getConfiguration());
   }
-  
+
   /**
    * Gets a list of the iterator options specified on this configuration.
    * 
@@ -989,11 +1006,11 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
    */
   protected static List<AccumuloIteratorOption> getIteratorOptions(Configuration conf) {
     String iteratorOptions = conf.get(ITERATORS_OPTIONS);
-    
+
     // If no options are present, return an empty list
     if (iteratorOptions == null || iteratorOptions.isEmpty())
       return new ArrayList<AccumuloIteratorOption>();
-    
+
     // Compose the set of options encoded in the job configuration
     StringTokenizer tokens = new StringTokenizer(conf.get(ITERATORS_OPTIONS), ITERATORS_DELIM);
     List<AccumuloIteratorOption> list = new ArrayList<AccumuloIteratorOption>();
@@ -1003,13 +1020,13 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
     }
     return list;
   }
-  
+
   protected abstract static class RecordReaderBase<K,V> extends RecordReader<K,V> {
     protected long numKeysRead;
     protected Iterator<Entry<Key,Value>> scannerIterator;
     private boolean scannerRegexEnabled = false;
     protected RangeInputSplit split;
-    
+
     /**
      * @deprecated since 1.4, configure {@link org.apache.accumulo.core.iterators.user.RegExFilter} instead.
      */
@@ -1024,7 +1041,7 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
         log.info("Setting " + methodName + " to " + regex);
       }
     }
-    
+
     /**
      * @deprecated since 1.4, configure {@link org.apache.accumulo.core.iterators.user.RegExFilter} instead.
      */
@@ -1039,15 +1056,16 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
         throw new AccumuloException("Can't set up regex for scanner");
       }
     }
-    
+
     // Apply the configured iterators from the job to the scanner
     /**
      * @deprecated Use {@link #setupIterators(Configuration,Scanner)} instead
      */
-    protected void setupIterators(TaskAttemptContext attempt, Scanner scanner) throws AccumuloException {
-      setupIterators(attempt.getConfiguration(), scanner);
+    protected void setupIterators(TaskAttemptContext attempt, Scanner scanner, List<AccumuloIterator> iterators, List<AccumuloIteratorOption> options)
+        throws AccumuloException {
+      setupIterators(attempt.getConfiguration(), scanner, iterators, options);
     }
-    
+
     /**
      * Apply the configured iterators from the configuration to the scanner.
      * 
@@ -1057,10 +1075,9 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
      *          the scanner to configure
      * @throws AccumuloException
      */
-    protected void setupIterators(Configuration conf, Scanner scanner) throws AccumuloException {
-      List<AccumuloIterator> iterators = getIterators(conf);
-      List<AccumuloIteratorOption> options = getIteratorOptions(conf);
-      
+    protected void setupIterators(Configuration conf, Scanner scanner, List<AccumuloIterator> iterators, List<AccumuloIteratorOption> options)
+        throws AccumuloException {
+
       Map<String,IteratorSetting> scanIterators = new HashMap<String,IteratorSetting>();
       for (AccumuloIterator iterator : iterators) {
         scanIterators.put(iterator.getIteratorName(), new IteratorSetting(iterator.getPriority(), iterator.getIteratorName(), iterator.getIteratorClass()));
@@ -1072,14 +1089,14 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
         scanner.addScanIterator(scanIterators.get(iterator.getIteratorName()));
       }
     }
-    
+
     /**
      * @deprecated Use {@link #setupMaxVersions(Configuration,Scanner)} instead
      */
-    protected void setupMaxVersions(TaskAttemptContext attempt, Scanner scanner) {
-      setupMaxVersions(attempt.getConfiguration(), scanner);
+    protected void setupMaxVersions(TaskAttemptContext attempt, Scanner scanner, int maxVersions) {
+      setupMaxVersions(attempt.getConfiguration(), scanner, maxVersions);
     }
-    
+
     /**
      * If maxVersions has been set, configure a {@link VersioningIterator} at priority 0 for this scanner.
      * 
@@ -1088,8 +1105,7 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
      * @param scanner
      *          the scanner to configure
      */
-    protected void setupMaxVersions(Configuration conf, Scanner scanner) {
-      int maxVersions = getMaxVersions(conf);
+    protected void setupMaxVersions(Configuration conf, Scanner scanner, int maxVersions) {
       // Check to make sure its a legit value
       if (maxVersions >= 1) {
         IteratorSetting vers = new IteratorSetting(0, "vers", VersioningIterator.class);
@@ -1097,54 +1113,128 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
         scanner.addScanIterator(vers);
       }
     }
-    
+
     /**
      * Initialize a scanner over the given input split using this task attempt configuration.
      */
     public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IOException {
       Scanner scanner;
       split = (RangeInputSplit) inSplit;
-      log.debug("Initializing input split: " + split.range);
+      log.debug("Initializing input split: " + split.getRange());
       Configuration conf = attempt.getConfiguration();
-      Instance instance = getInstance(conf);
-      String user = getUsername(conf);
-      byte[] password = getPassword(conf);
-      Authorizations authorizations = getAuthorizations(conf);
+
+      Instance instance = split.getInstance();
+      if (null == instance) {
+        instance = getInstance(conf);
+      }
+
+      String user = split.getUsername();
+      if (null == user) {
+        user = getUsername(conf);
+      }
+
+      byte[] password = split.getPassword();
+      if (null == password) {
+        password = getPassword(conf);
+      }
+
+      Authorizations authorizations = split.getAuths();
+      if (null == authorizations) {
+        authorizations = getAuthorizations(conf);
+      }
+
+      String table = split.getTable();
+      if (null == table) {
+        table = getTablename(conf);
+      }
+      
+      Boolean isOffline = split.isOffline();
+      if (null == isOffline) {
+        isOffline = isOfflineScan(conf);
+      }
+
+      Boolean isIsolated = split.isIsolatedScan();
+      if (null == isIsolated) {
+        isIsolated = isIsolated(conf);
+      }
+
+      Boolean usesLocalIterators = split.usesLocalIterators();
+      if (null == usesLocalIterators) {
+        usesLocalIterators = usesLocalIterators(conf);
+      }
+
+      String rowRegex = split.getRowRegex();
+      if (null == rowRegex) {
+        rowRegex = conf.get(ROW_REGEX);
+      }
+
+      String colfRegex = split.getColfamRegex();
+      if (null == colfRegex) {
+        colfRegex = conf.get(COLUMN_FAMILY_REGEX);
+      }
+
+      String colqRegex = split.getColqualRegex();
+      if (null == colqRegex) {
+        colqRegex = conf.get(COLUMN_QUALIFIER_REGEX);
+      }
+
+      String valueRegex = split.getValueRegex();
+      if (null == valueRegex) {
+        valueRegex = conf.get(VALUE_REGEX);
+      }
+
+      Integer maxVersions = split.getMaxVersions();
+      if (null == maxVersions) {
+        maxVersions = getMaxVersions(conf);
+      }
+      
+      List<AccumuloIterator> iterators = split.getIterators();
+      if (null == iterators) {
+        iterators = getIterators(conf);
+      }
+      
+      List<AccumuloIteratorOption> options = split.getOptions();
+      if (null == options) {
+        options = getIteratorOptions(conf);
+      }
       
+      Set<Pair<Text,Text>> columns = split.getFetchedColumns();
+      if (null == columns) {
+        columns = getFetchedColumns(conf);
+      }
+
       try {
         log.debug("Creating connector with user: " + user);
         Connector conn = instance.getConnector(user, password);
-        log.debug("Creating scanner for table: " + getTablename(conf));
+        log.debug("Creating scanner for table: " + table);
         log.debug("Authorizations are: " + authorizations);
-        if (isOfflineScan(conf)) {
-          scanner = new OfflineScanner(instance, new AuthInfo(user, ByteBuffer.wrap(password), instance.getInstanceID()), Tables.getTableId(instance,
-              getTablename(conf)), authorizations);
+        if (isOffline) {
+          scanner = new OfflineScanner(instance, new AuthInfo(user, ByteBuffer.wrap(password), instance.getInstanceID()), Tables.getTableId(instance, table),
+              authorizations);
         } else {
-          scanner = conn.createScanner(getTablename(conf), authorizations);
+          scanner = conn.createScanner(table, authorizations);
         }
-        if (isIsolated(conf)) {
+        if (isIsolated) {
           log.info("Creating isolated scanner");
           scanner = new IsolatedScanner(scanner);
         }
-        if (usesLocalIterators(conf)) {
+        if (usesLocalIterators) {
           log.info("Using local iterators");
           scanner = new ClientSideIteratorScanner(scanner);
         }
-        setupMaxVersions(conf, scanner);
-        if (conf.get(ROW_REGEX) != null || conf.get(COLUMN_FAMILY_REGEX) != null || conf.get(COLUMN_QUALIFIER_REGEX) != null ||
-            conf.get(VALUE_REGEX) != null) {
+        setupMaxVersions(conf, scanner, maxVersions);
+        if (rowRegex != null || colfRegex != null || colqRegex != null || valueRegex != null) {
           IteratorSetting is = new IteratorSetting(50, RegExFilter.class);
-          RegExFilter.setRegexs(is, conf.get(ROW_REGEX), conf.get(COLUMN_FAMILY_REGEX), conf.get(COLUMN_QUALIFIER_REGEX),
-            conf.get(VALUE_REGEX), false);
+          RegExFilter.setRegexs(is, rowRegex, colfRegex, colqRegex, valueRegex, false);
           scanner.addScanIterator(is);
         }
-        setupIterators(conf, scanner);
+        setupIterators(conf, scanner, iterators, options);
       } catch (Exception e) {
         throw new IOException(e);
       }
-      
+
       // setup a scanner within the bounds of this split
-      for (Pair<Text,Text> c : getFetchedColumns(conf)) {
+      for (Pair<Text,Text> c : columns) {
         if (c.getSecond() != null) {
           log.debug("Fetching column " + c.getFirst() + ":" + c.getSecond());
           scanner.fetchColumn(c.getFirst(), c.getSecond());
@@ -1153,48 +1243,48 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
           scanner.fetchColumnFamily(c.getFirst());
         }
       }
-      
-      scanner.setRange(split.range);
-      
+
+      scanner.setRange(split.getRange());
+
       numKeysRead = 0;
-      
+
       // do this last after setting all scanner options
       scannerIterator = scanner.iterator();
     }
-    
+
     public void close() {}
-    
+
     public float getProgress() throws IOException {
       if (numKeysRead > 0 && currentKey == null)
         return 1.0f;
       return split.getProgress(currentKey);
     }
-    
+
     protected K currentK = null;
     protected V currentV = null;
     protected Key currentKey = null;
     protected Value currentValue = null;
-    
+
     @Override
     public K getCurrentKey() throws IOException, InterruptedException {
       return currentK;
     }
-    
+
     @Override
     public V getCurrentValue() throws IOException, InterruptedException {
       return currentV;
     }
   }
-  
+
   Map<String,Map<KeyExtent,List<Range>>> binOfflineTable(JobContext job, String tableName, List<Range> ranges) throws TableNotFoundException,
       AccumuloException, AccumuloSecurityException {
-    
+
     Map<String,Map<KeyExtent,List<Range>>> binnedRanges = new HashMap<String,Map<KeyExtent,List<Range>>>();
 
     Instance instance = getInstance(job.getConfiguration());
     Connector conn = instance.getConnector(getUsername(job.getConfiguration()), getPassword(job.getConfiguration()));
     String tableId = Tables.getTableId(instance, tableName);
-    
+
     if (Tables.getTableState(instance, tableId) != TableState.OFFLINE) {
       Tables.clearCache(instance);
       if (Tables.getTableState(instance, tableId) != TableState.OFFLINE) {
@@ -1204,12 +1294,12 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
 
     for (Range range : ranges) {
       Text startRow;
-      
+
       if (range.getStartKey() != null)
         startRow = range.getStartKey().getRow();
       else
         startRow = new Text();
-      
+
       Range metadataRange = new Range(new KeyExtent(new Text(tableId), startRow, null).getMetadataEntry(), true, null, false);
       Scanner scanner = conn.createScanner(Constants.METADATA_TABLE_NAME, Constants.NO_AUTHS);
       ColumnFQ.fetch(scanner, Constants.METADATA_PREV_ROW_COLUMN);
@@ -1217,9 +1307,9 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
       scanner.fetchColumnFamily(Constants.METADATA_CURRENT_LOCATION_COLUMN_FAMILY);
       scanner.fetchColumnFamily(Constants.METADATA_FUTURE_LOCATION_COLUMN_FAMILY);
       scanner.setRange(metadataRange);
-      
+
       RowIterator rowIter = new RowIterator(scanner);
-      
+
       // TODO check that extents match prev extent
 
       KeyExtent lastExtent = null;
@@ -1229,15 +1319,15 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
         String last = "";
         KeyExtent extent = null;
         String location = null;
-        
+
         while (row.hasNext()) {
           Entry<Key,Value> entry = row.next();
           Key key = entry.getKey();
-          
+
           if (key.getColumnFamily().equals(Constants.METADATA_LAST_LOCATION_COLUMN_FAMILY)) {
             last = entry.getValue().toString();
           }
-          
+
           if (key.getColumnFamily().equals(Constants.METADATA_CURRENT_LOCATION_COLUMN_FAMILY)
               || key.getColumnFamily().equals(Constants.METADATA_FUTURE_LOCATION_COLUMN_FAMILY)) {
             location = entry.getValue().toString();
@@ -1246,9 +1336,9 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
           if (Constants.METADATA_PREV_ROW_COLUMN.hasColumns(key)) {
             extent = new KeyExtent(key.getRow(), entry.getValue());
           }
-          
+
         }
-        
+
         if (location != null)
           return null;
 
@@ -1265,24 +1355,24 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
           tabletRanges = new HashMap<KeyExtent,List<Range>>();
           binnedRanges.put(last, tabletRanges);
         }
-        
+
         List<Range> rangeList = tabletRanges.get(extent);
         if (rangeList == null) {
           rangeList = new ArrayList<Range>();
           tabletRanges.put(extent, rangeList);
         }
-        
+
         rangeList.add(range);
 
         if (extent.getEndRow() == null || range.afterEndKey(new Key(extent.getEndRow()).followingKey(PartialKey.ROW))) {
           break;
         }
-        
+
         lastExtent = extent;
       }
 
     }
-    
+
     return binnedRanges;
   }
 
@@ -1290,18 +1380,35 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
    * Read the metadata table to get tablets and match up ranges to them.
    */
   public List<InputSplit> getSplits(JobContext job) throws IOException {
-    log.setLevel(getLogLevel(job.getConfiguration()));
-    validateOptions(job.getConfiguration());
-    
-    String tableName = getTablename(job.getConfiguration());
-    boolean autoAdjust = getAutoAdjustRanges(job.getConfiguration());
-    List<Range> ranges = autoAdjust ? Range.mergeOverlapping(getRanges(job.getConfiguration())) : getRanges(job.getConfiguration());
-    
+    Configuration conf = job.getConfiguration();
+
+    log.setLevel(getLogLevel(conf));
+    validateOptions(conf);
+
+    String tableName = getTablename(conf);
+    boolean autoAdjust = getAutoAdjustRanges(conf);
+    List<Range> ranges = autoAdjust ? Range.mergeOverlapping(getRanges(conf)) : getRanges(conf);
+    boolean offline = isOfflineScan(conf);
+    boolean isolated = isIsolated(conf);
+    boolean localIterators = usesLocalIterators(conf);
+    boolean mockInstance = conf.getBoolean(MOCK, false);
+    int maxVersions = getMaxVersions(conf);
+    String rowRegex = conf.get(ROW_REGEX), colfamRegex = conf.get(COLUMN_FAMILY_REGEX), colqualRegex = conf.get(COLUMN_QUALIFIER_REGEX), valueRegex = conf
+        .get(VALUE_REGEX);
+    Set<Pair<Text,Text>> fetchedColumns = getFetchedColumns(conf);
+    Authorizations auths = getAuthorizations(conf);
+    byte[] password = getPassword(conf);
+    String username = getUsername(conf);
+    Instance instance = getInstance(conf);
+    List<AccumuloIterator> iterators = getIterators(conf);
+    List<AccumuloIteratorOption> options = getIteratorOptions(conf);
+    Level logLevel = getLogLevel(conf);
+
     if (ranges.isEmpty()) {
       ranges = new ArrayList<Range>(1);
       ranges.add(new Range());
     }
-    
+
     // get the metadata information for these ranges
     Map<String,Map<KeyExtent,List<Range>>> binnedRanges = new HashMap<String,Map<KeyExtent,List<Range>>>();
     TabletLocator tl;
@@ -1314,7 +1421,6 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
           binnedRanges = binOfflineTable(job, tableName, ranges);
         }
       } else {
-        Instance instance = getInstance(job.getConfiguration());
         String tableId = null;
         tl = getTabletLocator(job.getConfiguration());
         // its possible that the cache could contain complete, but old information about a tables tablets... so clear it
@@ -1337,15 +1443,15 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
     } catch (Exception e) {
       throw new IOException(e);
     }
-    
+
     ArrayList<InputSplit> splits = new ArrayList<InputSplit>(ranges.size());
     HashMap<Range,ArrayList<String>> splitsToAdd = null;
-    
+
     if (!autoAdjust)
       splitsToAdd = new HashMap<Range,ArrayList<String>>();
-    
+
     HashMap<String,String> hostNameCache = new HashMap<String,String>();
-    
+
     for (Entry<String,Map<KeyExtent,List<Range>>> tserverBin : binnedRanges.entrySet()) {
       String ip = tserverBin.getKey().split(":", 2)[0];
       String location = hostNameCache.get(ip);
@@ -1354,14 +1460,14 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
         location = inetAddress.getHostName();
         hostNameCache.put(ip, location);
       }
-      
+
       for (Entry<KeyExtent,List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
         Range ke = extentRanges.getKey().toDataRange();
         for (Range r : extentRanges.getValue()) {
           if (autoAdjust) {
             // divide ranges into smaller ranges, based on the
             // tablets
-            splits.add(new RangeInputSplit(tableName, ke.clip(r), new String[] {location}));
+            splits.add(new RangeInputSplit(ke.clip(r), new String[] {location}));
           } else {
             // don't divide ranges
             ArrayList<String> locations = splitsToAdd.get(r);
@@ -1373,132 +1479,55 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
         }
       }
     }
-    
+
     if (!autoAdjust)
       for (Entry<Range,ArrayList<String>> entry : splitsToAdd.entrySet())
-        splits.add(new RangeInputSplit(tableName, entry.getKey(), entry.getValue().toArray(new String[0])));
-    return splits;
-  }
-  
-  /**
-   * The Class RangeInputSplit. Encapsulates an Accumulo range for use in Map Reduce jobs.
-   */
-  public static class RangeInputSplit extends InputSplit implements Writable {
-    private Range range;
-    private String[] locations;
-    
-    public RangeInputSplit() {
-      range = new Range();
-      locations = new String[0];
-    }
-    
-    public Range getRange() {
-      return range;
-    }
-    
-    private static byte[] extractBytes(ByteSequence seq, int numBytes) {
-      byte[] bytes = new byte[numBytes + 1];
-      bytes[0] = 0;
-      for (int i = 0; i < numBytes; i++) {
-        if (i >= seq.length())
-          bytes[i + 1] = 0;
-        else
-          bytes[i + 1] = seq.byteAt(i);
-      }
-      return bytes;
-    }
-    
-    public static float getProgress(ByteSequence start, ByteSequence end, ByteSequence position) {
-      int maxDepth = Math.min(Math.max(end.length(), start.length()), position.length());
-      BigInteger startBI = new BigInteger(extractBytes(start, maxDepth));
-      BigInteger endBI = new BigInteger(extractBytes(end, maxDepth));
-      BigInteger positionBI = new BigInteger(extractBytes(position, maxDepth));
-      return (float) (positionBI.subtract(startBI).doubleValue() / endBI.subtract(startBI).doubleValue());
-    }
-    
-    public float getProgress(Key currentKey) {
-      if (currentKey == null)
-        return 0f;
-      if (range.getStartKey() != null && range.getEndKey() != null) {
-        if (range.getStartKey().compareTo(range.getEndKey(), PartialKey.ROW) != 0) {
-          // just look at the row progress
-          return getProgress(range.getStartKey().getRowData(), range.getEndKey().getRowData(), currentKey.getRowData());
-        } else if (range.getStartKey().compareTo(range.getEndKey(), PartialKey.ROW_COLFAM) != 0) {
-          // just look at the column family progress
-          return getProgress(range.getStartKey().getColumnFamilyData(), range.getEndKey().getColumnFamilyData(), currentKey.getColumnFamilyData());
-        } else if (range.getStartKey().compareTo(range.getEndKey(), PartialKey.ROW_COLFAM_COLQUAL) != 0) {
-          // just look at the column qualifier progress
-          return getProgress(range.getStartKey().getColumnQualifierData(), range.getEndKey().getColumnQualifierData(), currentKey.getColumnQualifierData());
-        }
-      }
-      // if we can't figure it out, then claim no progress
-      return 0f;
-    }
-    
-    RangeInputSplit(String table, Range range, String[] locations) {
-      this.range = range;
-      this.locations = locations;
-    }
-    
-    /**
-     * This implementation of length is only an estimate, it does not provide exact values. Do not have your code rely on this return value.
-     */
-    public long getLength() throws IOException {
-      Text startRow = range.isInfiniteStartKey() ? new Text(new byte[] {Byte.MIN_VALUE}) : range.getStartKey().getRow();
-      Text stopRow = range.isInfiniteStopKey() ? new Text(new byte[] {Byte.MAX_VALUE}) : range.getEndKey().getRow();
-      int maxCommon = Math.min(7, Math.min(startRow.getLength(), stopRow.getLength()));
-      long diff = 0;
-      
-      byte[] start = startRow.getBytes();
-      byte[] stop = stopRow.getBytes();
-      for (int i = 0; i < maxCommon; ++i) {
-        diff |= 0xff & (start[i] ^ stop[i]);
-        diff <<= Byte.SIZE;
-      }
-      
-      if (startRow.getLength() != stopRow.getLength())
-        diff |= 0xff;
-      
-      return diff + 1;
-    }
-    
-    public String[] getLocations() throws IOException {
-      return locations;
-    }
-    
-    public void readFields(DataInput in) throws IOException {
-      range.readFields(in);
-      int numLocs = in.readInt();
-      locations = new String[numLocs];
-      for (int i = 0; i < numLocs; ++i)
-        locations[i] = in.readUTF();
-    }
-    
-    public void write(DataOutput out) throws IOException {
-      range.write(out);
-      out.writeInt(locations.length);
-      for (int i = 0; i < locations.length; ++i)
-        out.writeUTF(locations[i]);
+        splits.add(new RangeInputSplit(entry.getKey(), entry.getValue().toArray(new String[0])));
+
+    for (InputSplit inputSplit : splits) {
+      RangeInputSplit split = (RangeInputSplit) inputSplit;
+
+      split.setTable(tableName);
+      split.setOffline(offline);
+      split.setIsolatedScan(isolated);
+      split.setUsesLocalIterators(localIterators);
+      split.setMockInstance(mockInstance);
+      split.setMaxVersions(maxVersions);
+      split.setRowRegex(rowRegex);
+      split.setColfamRegex(colfamRegex);
+      split.setColqualRegex(colqualRegex);
+      split.setValueRegex(valueRegex);
+      split.setFetchedColumns(fetchedColumns);
+      split.setUsername(username);
+      split.setPassword(password);
+      split.setInstanceName(instance.getInstanceName());
+      split.setZooKeepers(instance.getZooKeepers());
+      split.setAuths(auths);
+      split.setIterators(iterators);
+      split.setOptions(options);
+      split.setLogLevel(logLevel);
     }
+
+    return splits;
   }
-  
+
   /**
    * The Class IteratorSetting. Encapsulates specifics for an Accumulo iterator's name & priority.
    */
   static class AccumuloIterator {
-    
+
     private static final String FIELD_SEP = ":";
-    
+
     private int priority;
     private String iteratorClass;
     private String iteratorName;
-    
+
     public AccumuloIterator(int priority, String iteratorClass, String iteratorName) {
       this.priority = priority;
       this.iteratorClass = iteratorClass;
       this.iteratorName = iteratorName;
     }
-    
+
     // Parses out a setting given an string supplied from an earlier toString() call
     public AccumuloIterator(String iteratorSetting) {
       // Parse the string to expand the iterator
@@ -1507,42 +1536,42 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
       iteratorClass = tokenizer.nextToken();
       iteratorName = tokenizer.nextToken();
     }
-    
+
     public int getPriority() {
       return priority;
     }
-    
+
     public String getIteratorClass() {
       return iteratorClass;
     }
-    
+
     public String getIteratorName() {
       return iteratorName;
     }
-    
+
     @Override
     public String toString() {
       return new String(priority + FIELD_SEP + iteratorClass + FIELD_SEP + iteratorName);
     }
-    
+
   }
-  
+
   /**
    * The Class AccumuloIteratorOption. Encapsulates specifics for an Accumulo iterator's optional configuration details - associated via the iteratorName.
    */
   static class AccumuloIteratorOption {
     private static final String FIELD_SEP = ":";
-    
+
     private String iteratorName;
     private String key;
     private String value;
-    
+
     public AccumuloIteratorOption(String iteratorName, String key, String value) {
       this.iteratorName = iteratorName;
       this.key = key;
       this.value = value;
     }
-    
+
     // Parses out an option given a string supplied from an earlier toString() call
     public AccumuloIteratorOption(String iteratorOption) {
       StringTokenizer tokenizer = new StringTokenizer(iteratorOption, FIELD_SEP);
@@ -1554,19 +1583,19 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
         throw new RuntimeException(e);
       }
     }
-    
+
     public String getIteratorName() {
       return iteratorName;
     }
-    
+
     public String getKey() {
       return key;
     }
-    
+
     public String getValue() {
       return value;
     }
-    
+
     @Override
     public String toString() {
       try {
@@ -1575,7 +1604,7 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> {
         throw new RuntimeException(e);
       }
     }
-    
+
   }
-  
+
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/73114819/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
----------------------------------------------------------------------
diff --git a/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java b/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
new file mode 100644
index 0000000..e372801
--- /dev/null
+++ b/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
@@ -0,0 +1,493 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.math.BigInteger;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.accumulo.core.client.Instance;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.client.mapreduce.InputFormatBase.AccumuloIterator;
+import org.apache.accumulo.core.client.mapreduce.InputFormatBase.AccumuloIteratorOption;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.data.ByteSequence;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.PartialKey;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.Pair;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.log4j.Level;
+
+/**
+ * The Class RangeInputSplit. Encapsulates an Accumulo range for use in Map Reduce jobs.
+ */
+public class RangeInputSplit extends InputSplit implements Writable {
+  private Range range;
+  private String[] locations;
+  private String table, instanceName, zooKeepers, username;
+  private String rowRegex, colfamRegex, colqualRegex, valueRegex;
+  private byte[] password;
+  private Boolean offline, mockInstance, isolatedScan, localIterators;
+  private Integer maxVersions;
+  private Authorizations auths;
+  private Set<Pair<Text,Text>> fetchedColumns;
+  private List<AccumuloIterator> iterators;
+  private List<AccumuloIteratorOption> options;
+  private Level level;
+
+  public RangeInputSplit() {
+    range = new Range();
+    locations = new String[0];
+  }
+
+  public RangeInputSplit(Range range, String[] locations) {
+    this.range = range;
+    this.locations = locations;
+  }
+
+  public Range getRange() {
+    return range;
+  }
+
+  private static byte[] extractBytes(ByteSequence seq, int numBytes) {
+    byte[] bytes = new byte[numBytes + 1];
+    bytes[0] = 0;
+    for (int i = 0; i < numBytes; i++) {
+      if (i >= seq.length())
+        bytes[i + 1] = 0;
+      else
+        bytes[i + 1] = seq.byteAt(i);
+    }
+    return bytes;
+  }
+
+  public static float getProgress(ByteSequence start, ByteSequence end, ByteSequence position) {
+    int maxDepth = Math.min(Math.max(end.length(), start.length()), position.length());
+    BigInteger startBI = new BigInteger(extractBytes(start, maxDepth));
+    BigInteger endBI = new BigInteger(extractBytes(end, maxDepth));
+    BigInteger positionBI = new BigInteger(extractBytes(position, maxDepth));
+    return (float) (positionBI.subtract(startBI).doubleValue() / endBI.subtract(startBI).doubleValue());
+  }
+
+  public float getProgress(Key currentKey) {
+    if (currentKey == null)
+      return 0f;
+    if (range.getStartKey() != null && range.getEndKey() != null) {
+      if (range.getStartKey().compareTo(range.getEndKey(), PartialKey.ROW) != 0) {
+        // just look at the row progress
+        return getProgress(range.getStartKey().getRowData(), range.getEndKey().getRowData(), currentKey.getRowData());
+      } else if (range.getStartKey().compareTo(range.getEndKey(), PartialKey.ROW_COLFAM) != 0) {
+        // just look at the column family progress
+        return getProgress(range.getStartKey().getColumnFamilyData(), range.getEndKey().getColumnFamilyData(), currentKey.getColumnFamilyData());
+      } else if (range.getStartKey().compareTo(range.getEndKey(), PartialKey.ROW_COLFAM_COLQUAL) != 0) {
+        // just look at the column qualifier progress
+        return getProgress(range.getStartKey().getColumnQualifierData(), range.getEndKey().getColumnQualifierData(), currentKey.getColumnQualifierData());
+      }
+    }
+    // if we can't figure it out, then claim no progress
+    return 0f;
+  }
+
+  /**
+   * This implementation of length is only an estimate, it does not provide exact values. Do not have your code rely on this return value.
+   */
+  public long getLength() throws IOException {
+    Text startRow = range.isInfiniteStartKey() ? new Text(new byte[] {Byte.MIN_VALUE}) : range.getStartKey().getRow();
+    Text stopRow = range.isInfiniteStopKey() ? new Text(new byte[] {Byte.MAX_VALUE}) : range.getEndKey().getRow();
+    int maxCommon = Math.min(7, Math.min(startRow.getLength(), stopRow.getLength()));
+    long diff = 0;
+
+    byte[] start = startRow.getBytes();
+    byte[] stop = stopRow.getBytes();
+    for (int i = 0; i < maxCommon; ++i) {
+      diff |= 0xff & (start[i] ^ stop[i]);
+      diff <<= Byte.SIZE;
+    }
+
+    if (startRow.getLength() != stopRow.getLength())
+      diff |= 0xff;
+
+    return diff + 1;
+  }
+
+  public String[] getLocations() throws IOException {
+    return locations;
+  }
+
+  public void readFields(DataInput in) throws IOException {
+    range.readFields(in);
+    int numLocs = in.readInt();
+    locations = new String[numLocs];
+    for (int i = 0; i < numLocs; ++i)
+      locations[i] = in.readUTF();
+    
+    if (in.readBoolean()) {
+      isolatedScan = in.readBoolean();
+    }
+    
+    if (in.readBoolean()) {
+      offline = in.readBoolean();
+    }
+    
+    if (in.readBoolean()) {
+      localIterators = in.readBoolean();
+    }
+    
+    if (in.readBoolean()) {
+      mockInstance = in.readBoolean();
+    }
+    
+    if (in.readBoolean()) {
+      maxVersions = in.readInt();
+    }
+    
+    if (in.readBoolean()) {
+      rowRegex = in.readUTF();
+    }
+    
+    if (in.readBoolean()) {
+      colfamRegex = in.readUTF();
+    }
+    
+    if (in.readBoolean()) {
+      colqualRegex = in.readUTF();
+    }
+    
+    if (in.readBoolean()) {
+      valueRegex = in.readUTF();
+    }
+    
+    if (in.readBoolean()) {
+      int numColumns = in.readInt();
+      String[] columns = new String[numColumns];
+      for (int i = 0; i < numColumns; i++) {
+        columns[i] = in.readUTF();
+      }
+      
+      fetchedColumns = InputFormatBase.deserializeFetchedColumns(columns);
+    }
+    
+    if (in.readBoolean()) {
+      auths = new Authorizations(StringUtils.split(in.readUTF()));
+    }
+    
+    if (in.readBoolean()) {
+      username = in.readUTF();
+    }
+    
+    if (in.readBoolean()) {
+      password = in.readUTF().getBytes();
+    }
+    
+    if (in.readBoolean()) {
+      instanceName = in.readUTF();
+    }
+    
+    if (in.readBoolean()) {
+      zooKeepers = in.readUTF();
+    }
+    
+    if (in.readBoolean()) {
+      level = Level.toLevel(in.readInt());
+    }
+  }
+
+  public void write(DataOutput out) throws IOException {
+    range.write(out);
+    out.writeInt(locations.length);
+    for (int i = 0; i < locations.length; ++i)
+      out.writeUTF(locations[i]);
+    
+    out.writeBoolean(null != isolatedScan);
+    if (null != isolatedScan) {
+      out.writeBoolean(isolatedScan);
+    }
+    
+    out.writeBoolean(null != offline);
+    if (null != offline) {
+      out.writeBoolean(offline);
+    }
+    
+    out.writeBoolean(null != localIterators);
+    if (null != localIterators) {
+      out.writeBoolean(localIterators);
+    }
+    
+    out.writeBoolean(null != mockInstance);
+    if (null != mockInstance) {
+      out.writeBoolean(mockInstance);
+    }
+    
+    out.writeBoolean(null != maxVersions);
+    if (null != maxVersions) {
+      out.writeInt(getMaxVersions());
+    }
+    
+    out.writeBoolean(null != rowRegex);
+    if (null != rowRegex) {
+      out.writeUTF(rowRegex);
+    }
+    
+    out.writeBoolean(null != colfamRegex);
+    if (null != colfamRegex) {
+      out.writeUTF(colfamRegex);
+    }
+    
+    out.writeBoolean(null != colqualRegex);
+    if (null != colqualRegex) {
+      out.writeUTF(colqualRegex);
+    }
+    
+    out.writeBoolean(null != valueRegex);
+    if (null != valueRegex) {
+      out.writeUTF(valueRegex);
+    }
+    
+    out.writeBoolean(null != fetchedColumns);
+    if (null != fetchedColumns) {
+      String[] cols = InputFormatBase.serializeColumns(fetchedColumns);
+      out.writeInt(cols.length);
+      for (String col : cols) {
+        out.writeUTF(col);
+      }
+    }
+    
+    out.writeBoolean(null != auths);
+    if (null != auths) {
+      out.writeUTF(auths.serialize());
+    }
+    
+    out.writeBoolean(null != username);
+    if (null != username) {
+      out.writeUTF(username);
+    }
+    
+    out.writeBoolean(null != password);
+    if (null != password) {
+      out.writeUTF(new String(password));
+    }
+    
+    out.writeBoolean(null != instanceName);
+    if (null != instanceName) {
+      out.writeUTF(instanceName);
+    }
+    
+    out.writeBoolean(null != zooKeepers);
+    if (null != zooKeepers) {
+      out.writeUTF(zooKeepers);
+    }
+    
+    out.writeBoolean(null != level);
+    if (null != level) {
+      out.writeInt(level.toInt());
+    }
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder(128);
+    sb.append("Range: ").append(range);
+    sb.append(" Locations: ").append(locations);
+    sb.append(" Table: ").append(table);
+    // TODO finish building of string
+    return sb.toString();
+  }
+
+  public String getTable() {
+    return table;
+  }
+
+  public void setTable(String table) {
+    this.table = table;
+  }
+  
+  public Instance getInstance() {
+    if (null == instanceName) {
+      return null;
+    }
+    
+    if (isMockInstance()) {  
+      return new MockInstance(getInstanceName());
+    }
+    
+    if (null == zooKeepers) {
+      return null;
+    }
+    
+    return new ZooKeeperInstance(getInstanceName(), getZooKeepers());
+  }
+
+  public String getInstanceName() {
+    return instanceName;
+  }
+
+  public void setInstanceName(String instanceName) {
+    this.instanceName = instanceName;
+  }
+
+  public String getZooKeepers() {
+    return zooKeepers;
+  }
+
+  public void setZooKeepers(String zooKeepers) {
+    this.zooKeepers = zooKeepers;
+  }
+
+  public String getUsername() {
+    return username;
+  }
+
+  public void setUsername(String username) {
+    this.username = username;
+  }
+
+  public byte[] getPassword() {
+    return password;
+  }
+
+  public void setPassword(byte[] password) {
+    this.password = password;
+  }
+
+  public Boolean isOffline() {
+    return offline;
+  }
+
+  public void setOffline(Boolean offline) {
+    this.offline = offline;
+  }
+
+  public void setLocations(String[] locations) {
+    this.locations = locations;
+  }
+
+  public String getRowRegex() {
+    return rowRegex;
+  }
+
+  public void setRowRegex(String rowRegex) {
+    this.rowRegex = rowRegex;
+  }
+
+  public String getColfamRegex() {
+    return colfamRegex;
+  }
+
+  public void setColfamRegex(String colfamRegex) {
+    this.colfamRegex = colfamRegex;
+  }
+
+  public String getColqualRegex() {
+    return colqualRegex;
+  }
+
+  public void setColqualRegex(String colqualRegex) {
+    this.colqualRegex = colqualRegex;
+  }
+
+  public String getValueRegex() {
+    return valueRegex;
+  }
+
+  public void setValueRegex(String valueRegex) {
+    this.valueRegex = valueRegex;
+  }
+
+  public Boolean isMockInstance() {
+    return mockInstance;
+  }
+
+  public void setMockInstance(Boolean mockInstance) {
+    this.mockInstance = mockInstance;
+  }
+
+  public Boolean isIsolatedScan() {
+    return isolatedScan;
+  }
+
+  public void setIsolatedScan(Boolean isolatedScan) {
+    this.isolatedScan = isolatedScan;
+  }
+
+  public Integer getMaxVersions() {
+    return maxVersions;
+  }
+
+  public void setMaxVersions(Integer maxVersions) {
+    this.maxVersions = maxVersions;
+  }
+
+  public Authorizations getAuths() {
+    return auths;
+  }
+
+  public void setAuths(Authorizations auths) {
+    this.auths = auths;
+  }
+
+  public void setRange(Range range) {
+    this.range = range;
+  }
+
+  public Boolean usesLocalIterators() {
+    return localIterators;
+  }
+
+  public void setUsesLocalIterators(Boolean localIterators) {
+    this.localIterators = localIterators;
+  }
+
+  public Set<Pair<Text,Text>> getFetchedColumns() {
+    return fetchedColumns;
+  }
+
+  public void setFetchedColumns(Set<Pair<Text,Text>> fetchedColumns) {
+    this.fetchedColumns = fetchedColumns;
+  }
+
+  public List<AccumuloIterator> getIterators() {
+    return iterators;
+  }
+
+  public void setIterators(List<AccumuloIterator> iterators) {
+    this.iterators = iterators;
+  }
+
+  public List<AccumuloIteratorOption> getOptions() {
+    return options;
+  }
+
+  public void setOptions(List<AccumuloIteratorOption> options) {
+    this.options = options;
+  }
+  
+  public Level getLogLevel() {
+    return level;
+  }
+  
+  public void setLogLevel(Level level) {
+    this.level = level;
+  }
+}


[2/5] Squashed commit of the following:

Posted by el...@apache.org.
http://git-wip-us.apache.org/repos/asf/accumulo/blob/73114819/src/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormatTest.java
----------------------------------------------------------------------
diff --git a/src/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormatTest.java b/src/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormatTest.java
index ba647e9..7239b01 100644
--- a/src/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormatTest.java
+++ b/src/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormatTest.java
@@ -20,15 +20,17 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
 import java.util.List;
 import java.util.regex.Pattern;
 
 import org.apache.accumulo.core.client.BatchWriter;
 import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.Instance;
 import org.apache.accumulo.core.client.IteratorSetting;
 import org.apache.accumulo.core.client.mapreduce.InputFormatBase.AccumuloIterator;
 import org.apache.accumulo.core.client.mapreduce.InputFormatBase.AccumuloIteratorOption;
-import org.apache.accumulo.core.client.mapreduce.InputFormatBase.RangeInputSplit;
 import org.apache.accumulo.core.client.mapreduce.InputFormatBase.RegexType;
 import org.apache.accumulo.core.client.mock.MockInstance;
 import org.apache.accumulo.core.data.Key;
@@ -36,6 +38,7 @@ import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.iterators.user.WholeRowIterator;
 import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.Pair;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.InputSplit;
@@ -46,15 +49,16 @@ import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.log4j.Level;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Test;
 
 public class AccumuloInputFormatTest {
-  
+
   @After
   public void tearDown() throws Exception {}
-  
+
   /**
    * Test basic setting & getting of max versions.
    * 
@@ -68,7 +72,7 @@ public class AccumuloInputFormatTest {
     int version = AccumuloInputFormat.getMaxVersions(job.getConfiguration());
     assertEquals(1, version);
   }
-  
+
   /**
    * Test max versions with an invalid value.
    * 
@@ -80,7 +84,7 @@ public class AccumuloInputFormatTest {
     JobContext job = new JobContext(new Configuration(), new JobID());
     AccumuloInputFormat.setMaxVersions(job.getConfiguration(), 0);
   }
-  
+
   /**
    * Test no max version configured.
    */
@@ -89,7 +93,7 @@ public class AccumuloInputFormatTest {
     JobContext job = new JobContext(new Configuration(), new JobID());
     assertEquals(-1, AccumuloInputFormat.getMaxVersions(job.getConfiguration()));
   }
-  
+
   /**
    * Check that the iterator configuration is getting stored in the Job conf correctly.
    */
@@ -97,45 +101,45 @@ public class AccumuloInputFormatTest {
   @Test
   public void testSetIterator() {
     JobContext job = new JobContext(new Configuration(), new JobID());
-    
+
     AccumuloInputFormat.setIterator(job, 1, "org.apache.accumulo.core.iterators.WholeRowIterator", "WholeRow");
     Configuration conf = job.getConfiguration();
     String iterators = conf.get("AccumuloInputFormat.iterators");
     assertEquals("1:org.apache.accumulo.core.iterators.WholeRowIterator:WholeRow", iterators);
   }
-  
+
   @Test
   public void testAddIterator() {
     JobContext job = new JobContext(new Configuration(), new JobID());
-    
+
     AccumuloInputFormat.addIterator(job.getConfiguration(), new IteratorSetting(1, "WholeRow", WholeRowIterator.class));
     AccumuloInputFormat.addIterator(job.getConfiguration(), new IteratorSetting(2, "Versions", "org.apache.accumulo.core.iterators.VersioningIterator"));
     IteratorSetting iter = new IteratorSetting(3, "Count", "org.apache.accumulo.core.iterators.CountingIterator");
     iter.addOption("v1", "1");
     iter.addOption("junk", "\0omg:!\\xyzzy");
     AccumuloInputFormat.addIterator(job.getConfiguration(), iter);
-    
+
     List<AccumuloIterator> list = AccumuloInputFormat.getIterators(job.getConfiguration());
-    
+
     // Check the list size
     assertTrue(list.size() == 3);
-    
+
     // Walk the list and make sure our settings are correct
     AccumuloIterator setting = list.get(0);
     assertEquals(1, setting.getPriority());
     assertEquals("org.apache.accumulo.core.iterators.user.WholeRowIterator", setting.getIteratorClass());
     assertEquals("WholeRow", setting.getIteratorName());
-    
+
     setting = list.get(1);
     assertEquals(2, setting.getPriority());
     assertEquals("org.apache.accumulo.core.iterators.VersioningIterator", setting.getIteratorClass());
     assertEquals("Versions", setting.getIteratorName());
-    
+
     setting = list.get(2);
     assertEquals(3, setting.getPriority());
     assertEquals("org.apache.accumulo.core.iterators.CountingIterator", setting.getIteratorClass());
     assertEquals("Count", setting.getIteratorName());
-    
+
     List<AccumuloIteratorOption> iteratorOptions = AccumuloInputFormat.getIteratorOptions(job.getConfiguration());
     assertEquals(2, iteratorOptions.size());
     assertEquals("Count", iteratorOptions.get(0).getIteratorName());
@@ -145,7 +149,7 @@ public class AccumuloInputFormatTest {
     assertEquals("junk", iteratorOptions.get(1).getKey());
     assertEquals("\0omg:!\\xyzzy", iteratorOptions.get(1).getValue());
   }
-  
+
   /**
    * Test adding iterator options where the keys and values contain both the FIELD_SEPARATOR character (':') and ITERATOR_SEPARATOR (',') characters. There
    * should be no exceptions thrown when trying to parse these types of option entries.
@@ -160,16 +164,16 @@ public class AccumuloInputFormatTest {
     someSetting.addOption(key, value);
     Job job = new Job();
     AccumuloInputFormat.addIterator(job.getConfiguration(), someSetting);
-    
+
     final String rawConfigOpt = new AccumuloIteratorOption("iterator", key, value).toString();
-    
+
     assertEquals(rawConfigOpt, job.getConfiguration().get("AccumuloInputFormat.iterators.options"));
-    
+
     List<AccumuloIteratorOption> opts = AccumuloInputFormat.getIteratorOptions(job.getConfiguration());
     assertEquals(1, opts.size());
     assertEquals(opts.get(0).getKey(), key);
     assertEquals(opts.get(0).getValue(), value);
-    
+
     someSetting.addOption(key + "2", value);
     someSetting.setPriority(2);
     someSetting.setName("it2");
@@ -181,7 +185,7 @@ public class AccumuloInputFormatTest {
       assertEquals(opt.getValue(), value);
     }
   }
-  
+
   /**
    * Test getting iterator settings for multiple iterators set
    */
@@ -189,34 +193,34 @@ public class AccumuloInputFormatTest {
   @Test
   public void testGetIteratorSettings() {
     JobContext job = new JobContext(new Configuration(), new JobID());
-    
+
     AccumuloInputFormat.setIterator(job, 1, "org.apache.accumulo.core.iterators.WholeRowIterator", "WholeRow");
     AccumuloInputFormat.setIterator(job, 2, "org.apache.accumulo.core.iterators.VersioningIterator", "Versions");
     AccumuloInputFormat.setIterator(job, 3, "org.apache.accumulo.core.iterators.CountingIterator", "Count");
-    
+
     List<AccumuloIterator> list = AccumuloInputFormat.getIterators(job);
-    
+
     // Check the list size
     assertTrue(list.size() == 3);
-    
+
     // Walk the list and make sure our settings are correct
     AccumuloIterator setting = list.get(0);
     assertEquals(1, setting.getPriority());
     assertEquals("org.apache.accumulo.core.iterators.WholeRowIterator", setting.getIteratorClass());
     assertEquals("WholeRow", setting.getIteratorName());
-    
+
     setting = list.get(1);
     assertEquals(2, setting.getPriority());
     assertEquals("org.apache.accumulo.core.iterators.VersioningIterator", setting.getIteratorClass());
     assertEquals("Versions", setting.getIteratorName());
-    
+
     setting = list.get(2);
     assertEquals(3, setting.getPriority());
     assertEquals("org.apache.accumulo.core.iterators.CountingIterator", setting.getIteratorClass());
     assertEquals("Count", setting.getIteratorName());
-    
+
   }
-  
+
   /**
    * Check that the iterator options are getting stored in the Job conf correctly.
    */
@@ -225,12 +229,12 @@ public class AccumuloInputFormatTest {
   public void testSetIteratorOption() {
     JobContext job = new JobContext(new Configuration(), new JobID());
     AccumuloInputFormat.setIteratorOption(job, "someIterator", "aKey", "aValue");
-    
+
     Configuration conf = job.getConfiguration();
     String options = conf.get("AccumuloInputFormat.iterators.options");
     assertEquals(new String("someIterator:aKey:aValue"), options);
   }
-  
+
   /**
    * Test getting iterator options for multiple options set
    */
@@ -238,49 +242,49 @@ public class AccumuloInputFormatTest {
   @Test
   public void testGetIteratorOption() {
     JobContext job = new JobContext(new Configuration(), new JobID());
-    
+
     AccumuloInputFormat.setIteratorOption(job, "iterator1", "key1", "value1");
     AccumuloInputFormat.setIteratorOption(job, "iterator2", "key2", "value2");
     AccumuloInputFormat.setIteratorOption(job, "iterator3", "key3", "value3");
-    
+
     List<AccumuloIteratorOption> list = AccumuloInputFormat.getIteratorOptions(job);
-    
+
     // Check the list size
     assertEquals(3, list.size());
-    
+
     // Walk the list and make sure all the options are correct
     AccumuloIteratorOption option = list.get(0);
     assertEquals("iterator1", option.getIteratorName());
     assertEquals("key1", option.getKey());
     assertEquals("value1", option.getValue());
-    
+
     option = list.get(1);
     assertEquals("iterator2", option.getIteratorName());
     assertEquals("key2", option.getKey());
     assertEquals("value2", option.getValue());
-    
+
     option = list.get(2);
     assertEquals("iterator3", option.getIteratorName());
     assertEquals("key3", option.getKey());
     assertEquals("value3", option.getValue());
   }
-  
+
   @SuppressWarnings("deprecation")
   @Test
   public void testSetRegex() {
     JobContext job = new JobContext(new Configuration(), new JobID());
-    
+
     String regex = ">\"*%<>\'\\";
-    
+
     AccumuloInputFormat.setRegex(job, RegexType.ROW, regex);
-    
+
     assertTrue(regex.equals(AccumuloInputFormat.getRegex(job, RegexType.ROW)));
   }
-  
+
   static class TestMapper extends Mapper<Key,Value,Key,Value> {
     Key key = null;
     int count = 0;
-    
+
     @Override
     protected void map(Key k, Value v, Context context) throws IOException, InterruptedException {
       if (key != null)
@@ -291,7 +295,7 @@ public class AccumuloInputFormatTest {
       count++;
     }
   }
-  
+
   @Test
   public void testMap() throws Exception {
     MockInstance mockInstance = new MockInstance("testmapinstance");
@@ -304,20 +308,27 @@ public class AccumuloInputFormatTest {
       bw.addMutation(m);
     }
     bw.close();
-    
+
     Job job = new Job(new Configuration());
     job.setInputFormatClass(AccumuloInputFormat.class);
     job.setMapperClass(TestMapper.class);
     job.setNumReduceTasks(0);
     AccumuloInputFormat.setInputInfo(job.getConfiguration(), "root", "".getBytes(), "testtable", new Authorizations());
     AccumuloInputFormat.setMockInstance(job.getConfiguration(), "testmapinstance");
-    
+
     AccumuloInputFormat input = new AccumuloInputFormat();
     List<InputSplit> splits = input.getSplits(job);
     assertEquals(splits.size(), 1);
-    
+
     TestMapper mapper = (TestMapper) job.getMapperClass().newInstance();
     for (InputSplit split : splits) {
+      RangeInputSplit risplit = (RangeInputSplit) split;
+      Assert.assertEquals("root", risplit.getUsername());
+      Assert.assertArrayEquals(new byte[0], risplit.getPassword());
+      Assert.assertEquals("testtable", risplit.getTable());
+      Assert.assertEquals(new Authorizations(), risplit.getAuths());
+      Assert.assertEquals("testmapinstance", risplit.getInstanceName());
+      
       TaskAttemptID id = new TaskAttemptID();
       TaskAttemptContext attempt = new TaskAttemptContext(job.getConfiguration(), id);
       RecordReader<Key,Value> reader = input.createRecordReader(split, attempt);
@@ -326,7 +337,7 @@ public class AccumuloInputFormatTest {
       mapper.run(context);
     }
   }
-  
+
   @Test
   public void testSimple() throws Exception {
     MockInstance mockInstance = new MockInstance("testmapinstance");
@@ -339,7 +350,7 @@ public class AccumuloInputFormatTest {
       bw.addMutation(m);
     }
     bw.close();
-    
+
     JobContext job = new JobContext(new Configuration(), new JobID());
     AccumuloInputFormat.setInputInfo(job.getConfiguration(), "root", "".getBytes(), "testtable2", new Authorizations());
     AccumuloInputFormat.setMockInstance(job.getConfiguration(), "testmapinstance");
@@ -348,14 +359,14 @@ public class AccumuloInputFormatTest {
     TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID());
     RecordReader<Key,Value> rr = input.createRecordReader(ris, tac);
     rr.initialize(ris, tac);
-    
+
     TestMapper mapper = new TestMapper();
     Mapper<Key,Value,Key,Value>.Context context = mapper.new Context(job.getConfiguration(), tac.getTaskAttemptID(), rr, null, null, null, ris);
     while (rr.nextKeyValue()) {
       mapper.map(rr.getCurrentKey(), rr.getCurrentValue(), context);
     }
   }
-  
+
   @SuppressWarnings("deprecation")
   @Test
   public void testRegex() throws Exception {
@@ -369,7 +380,7 @@ public class AccumuloInputFormatTest {
       bw.addMutation(m);
     }
     bw.close();
-    
+
     JobContext job = new JobContext(new Configuration(), new JobID());
     AccumuloInputFormat.setInputInfo(job.getConfiguration(), "root", "".getBytes(), "testtable3", new Authorizations());
     AccumuloInputFormat.setMockInstance(job.getConfiguration(), "testmapinstance");
@@ -380,10 +391,144 @@ public class AccumuloInputFormatTest {
     TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID());
     RecordReader<Key,Value> rr = input.createRecordReader(ris, tac);
     rr.initialize(ris, tac);
-    
+
     Pattern p = Pattern.compile(regex);
     while (rr.nextKeyValue()) {
-      Assert.assertTrue( p.matcher( rr.getCurrentKey().getRow().toString()).matches());
+      Assert.assertTrue(p.matcher(rr.getCurrentKey().getRow().toString()).matches());
+    }
+  }
+
+  @SuppressWarnings("deprecation")
+  @Test
+  public void testCorrectRangeInputSplits() throws Exception {
+    JobContext job = new JobContext(new Configuration(), new JobID());
+
+    String username = "user", table = "table", rowRegex = "row.*", colfRegex = "colf.*", colqRegex = "colq.*";
+    String valRegex = "val.*", instance = "instance";
+    byte[] password = "password".getBytes();
+    Authorizations auths = new Authorizations("foo");
+    Collection<Pair<Text,Text>> fetchColumns = Collections.singleton(new Pair<Text,Text>(new Text("foo"), new Text("bar")));
+    boolean isolated = true, localIters = true;
+    int maxVersions = 5;
+    Level level = Level.WARN;
+
+    Instance inst = new MockInstance(instance);
+    Connector connector = inst.getConnector(username, password);
+    connector.tableOperations().create(table);
+
+    AccumuloInputFormat.setInputInfo(job, username, password, table, auths);
+    AccumuloInputFormat.setMockInstance(job, instance);
+    AccumuloInputFormat.setRegex(job, RegexType.ROW, rowRegex);
+    AccumuloInputFormat.setRegex(job, RegexType.COLUMN_FAMILY, colfRegex);
+    AccumuloInputFormat.setRegex(job, RegexType.COLUMN_QUALIFIER, colqRegex);
+    AccumuloInputFormat.setRegex(job, RegexType.VALUE, valRegex);
+    AccumuloInputFormat.setIsolated(job, isolated);
+    AccumuloInputFormat.setLocalIterators(job, localIters);
+    AccumuloInputFormat.setMaxVersions(job, maxVersions);
+    AccumuloInputFormat.fetchColumns(job, fetchColumns);
+    AccumuloInputFormat.setLogLevel(job, level);
+    
+    AccumuloInputFormat aif = new AccumuloInputFormat();
+    
+    List<InputSplit> splits = aif.getSplits(job);
+    
+    Assert.assertEquals(1, splits.size());
+    
+    InputSplit split = splits.get(0);
+    
+    Assert.assertEquals(RangeInputSplit.class, split.getClass());
+    
+    RangeInputSplit risplit = (RangeInputSplit) split;
+    
+    Assert.assertEquals(username, risplit.getUsername());
+    Assert.assertEquals(table, risplit.getTable());
+    Assert.assertArrayEquals(password, risplit.getPassword());
+    Assert.assertEquals(auths, risplit.getAuths());
+    Assert.assertEquals(instance, risplit.getInstanceName());
+    Assert.assertEquals(rowRegex, risplit.getRowRegex());
+    Assert.assertEquals(colfRegex, risplit.getColfamRegex());
+    Assert.assertEquals(colqRegex, risplit.getColqualRegex());
+    Assert.assertEquals(valRegex, risplit.getValueRegex());
+    Assert.assertEquals(isolated, risplit.isIsolatedScan());
+    Assert.assertEquals(localIters, risplit.usesLocalIterators());
+    Assert.assertEquals(maxVersions, risplit.getMaxVersions().intValue());
+    Assert.assertEquals(fetchColumns, risplit.getFetchedColumns());
+    Assert.assertEquals(level, risplit.getLogLevel());
+  }
+
+  @Test
+  public void testPartialInputSplitDelegationToConfiguration() throws Exception {
+    MockInstance mockInstance = new MockInstance("testPartialInputSplitDelegationToConfiguration");
+    Connector c = mockInstance.getConnector("root", new byte[] {});
+    c.tableOperations().create("testtable");
+    BatchWriter bw = c.createBatchWriter("testtable", 10000L, 1000L, 4);
+    for (int i = 0; i < 100; i++) {
+      Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
+      m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
+      bw.addMutation(m);
+    }
+    bw.close();
+
+    Job job = new Job(new Configuration());
+    job.setInputFormatClass(AccumuloInputFormat.class);
+    job.setMapperClass(TestMapper.class);
+    job.setNumReduceTasks(0);
+    AccumuloInputFormat.setInputInfo(job.getConfiguration(), "root", "".getBytes(), "testtable", new Authorizations());
+    AccumuloInputFormat.setMockInstance(job.getConfiguration(), "testPartialInputSplitDelegationToConfiguration");
+
+    AccumuloInputFormat input = new AccumuloInputFormat();
+    List<InputSplit> splits = input.getSplits(job);
+    assertEquals(splits.size(), 1);
+
+    TestMapper mapper = (TestMapper) job.getMapperClass().newInstance();
+    
+    RangeInputSplit emptySplit = new RangeInputSplit();
+    
+    // Using an empty split should fall back to the information in the Job's Configuration
+    TaskAttemptID id = new TaskAttemptID();
+    TaskAttemptContext attempt = new TaskAttemptContext(job.getConfiguration(), id);
+    RecordReader<Key,Value> reader = input.createRecordReader(emptySplit, attempt);
+    Mapper<Key,Value,Key,Value>.Context context = mapper.new Context(job.getConfiguration(), id, reader, null, null, null, emptySplit);
+    reader.initialize(emptySplit, context);
+    mapper.run(context);
+  }
+
+  @Test(expected = IOException.class)
+  public void testPartialFailedInputSplitDelegationToConfiguration() throws Exception {
+    MockInstance mockInstance = new MockInstance("testPartialFailedInputSplitDelegationToConfiguration");
+    Connector c = mockInstance.getConnector("root", new byte[] {});
+    c.tableOperations().create("testtable");
+    BatchWriter bw = c.createBatchWriter("testtable", 10000L, 1000L, 4);
+    for (int i = 0; i < 100; i++) {
+      Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
+      m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
+      bw.addMutation(m);
     }
+    bw.close();
+
+    Job job = new Job(new Configuration());
+    job.setInputFormatClass(AccumuloInputFormat.class);
+    job.setMapperClass(TestMapper.class);
+    job.setNumReduceTasks(0);
+    AccumuloInputFormat.setInputInfo(job.getConfiguration(), "root", "".getBytes(), "testtable", new Authorizations());
+    AccumuloInputFormat.setMockInstance(job.getConfiguration(), "testPartialFailedInputSplitDelegationToConfiguration");
+
+    AccumuloInputFormat input = new AccumuloInputFormat();
+    List<InputSplit> splits = input.getSplits(job);
+    assertEquals(splits.size(), 1);
+
+    TestMapper mapper = (TestMapper) job.getMapperClass().newInstance();
+    
+    RangeInputSplit emptySplit = new RangeInputSplit();
+    emptySplit.setUsername("root");
+    emptySplit.setPassword("anythingelse".getBytes());
+    
+    // Using an empty split should fall back to the information in the Job's Configuration
+    TaskAttemptID id = new TaskAttemptID();
+    TaskAttemptContext attempt = new TaskAttemptContext(job.getConfiguration(), id);
+    RecordReader<Key,Value> reader = input.createRecordReader(emptySplit, attempt);
+    Mapper<Key,Value,Key,Value>.Context context = mapper.new Context(job.getConfiguration(), id, reader, null, null, null, emptySplit);
+    reader.initialize(emptySplit, context);
+    mapper.run(context);
   }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/73114819/src/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloRowInputFormatTest.java
----------------------------------------------------------------------
diff --git a/src/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloRowInputFormatTest.java b/src/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloRowInputFormatTest.java
index 0673f1b..d9f9da0 100644
--- a/src/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloRowInputFormatTest.java
+++ b/src/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloRowInputFormatTest.java
@@ -27,7 +27,6 @@ import java.util.Map.Entry;
 
 import org.apache.accumulo.core.client.BatchWriter;
 import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.mapreduce.InputFormatBase.RangeInputSplit;
 import org.apache.accumulo.core.client.mock.MockInstance;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.KeyValue;

http://git-wip-us.apache.org/repos/asf/accumulo/blob/73114819/src/core/src/test/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplitTest.java
----------------------------------------------------------------------
diff --git a/src/core/src/test/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplitTest.java b/src/core/src/test/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplitTest.java
new file mode 100644
index 0000000..22fb6e1
--- /dev/null
+++ b/src/core/src/test/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplitTest.java
@@ -0,0 +1,100 @@
+package org.apache.accumulo.core.client.mapreduce;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.Pair;
+import org.apache.hadoop.io.Text;
+import org.apache.log4j.Level;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class RangeInputSplitTest {
+
+  @Test
+  public void testSimpleWritable() throws IOException {
+    RangeInputSplit split = new RangeInputSplit(new Range(new Key("a"), new Key("b")), new String[]{"localhost"});
+    
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    DataOutputStream dos = new DataOutputStream(baos);
+    split.write(dos);
+    
+    RangeInputSplit newSplit = new RangeInputSplit();
+    
+    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
+    DataInputStream dis = new DataInputStream(bais);
+    newSplit.readFields(dis);
+    
+    Assert.assertEquals(split.getRange(), newSplit.getRange());
+    Assert.assertTrue(Arrays.equals(split.getLocations(), newSplit.getLocations()));
+  }
+
+
+
+  @Test
+  public void testAllFieldsWritable() throws IOException {
+    RangeInputSplit split = new RangeInputSplit(new Range(new Key("a"), new Key("b")), new String[]{"localhost"});
+    
+    Set<Pair<Text,Text>> fetchedColumns = new HashSet<Pair<Text,Text>>();
+    
+    fetchedColumns.add(new Pair<Text,Text>(new Text("colf1"), new Text("colq1")));
+    fetchedColumns.add(new Pair<Text,Text>(new Text("colf2"), new Text("colq2")));
+    
+    split.setAuths(new Authorizations("foo"));
+    split.setOffline(true);
+    split.setIsolatedScan(true);
+    split.setUsesLocalIterators(true);
+    split.setMaxVersions(5);
+    split.setRowRegex("row");
+    split.setColfamRegex("colf");
+    split.setColqualRegex("colq");
+    split.setValueRegex("value");
+    split.setFetchedColumns(fetchedColumns);
+    split.setPassword("password".getBytes());
+    split.setUsername("root");
+    split.setInstanceName("instance");
+    split.setMockInstance(true);
+    split.setZooKeepers("localhost");
+    split.setLogLevel(Level.WARN);
+    
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    DataOutputStream dos = new DataOutputStream(baos);
+    split.write(dos);
+    
+    RangeInputSplit newSplit = new RangeInputSplit();
+    
+    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
+    DataInputStream dis = new DataInputStream(bais);
+    newSplit.readFields(dis);
+    
+    Assert.assertEquals(split.getRange(), newSplit.getRange());
+    Assert.assertArrayEquals(split.getLocations(), newSplit.getLocations());
+    
+    Assert.assertEquals(split.getAuths(), newSplit.getAuths());
+    Assert.assertEquals(split.isOffline(), newSplit.isOffline());
+    Assert.assertEquals(split.isIsolatedScan(), newSplit.isOffline());
+    Assert.assertEquals(split.usesLocalIterators(), newSplit.usesLocalIterators());
+    Assert.assertEquals(split.getMaxVersions(), newSplit.getMaxVersions());
+    Assert.assertEquals(split.getRowRegex(), newSplit.getRowRegex());
+    Assert.assertEquals(split.getColfamRegex(), newSplit.getColfamRegex());
+    Assert.assertEquals(split.getColqualRegex(), newSplit.getColqualRegex());
+    Assert.assertEquals(split.getValueRegex(), newSplit.getValueRegex());
+    Assert.assertEquals(split.getFetchedColumns(), newSplit.getFetchedColumns());
+    Assert.assertEquals(new String(split.getPassword()), new String(newSplit.getPassword()));
+    Assert.assertEquals(split.getUsername(), newSplit.getUsername());
+    Assert.assertEquals(split.getInstanceName(), newSplit.getInstanceName());
+    Assert.assertEquals(split.isMockInstance(), newSplit.isMockInstance());
+    Assert.assertEquals(split.getZooKeepers(), newSplit.getZooKeepers());
+    Assert.assertEquals(split.getLogLevel(), newSplit.getLogLevel());
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/73114819/src/examples/simple/src/test/java/org/apache/accumulo/examples/simple/filedata/ChunkInputFormatTest.java
----------------------------------------------------------------------
diff --git a/src/examples/simple/src/test/java/org/apache/accumulo/examples/simple/filedata/ChunkInputFormatTest.java b/src/examples/simple/src/test/java/org/apache/accumulo/examples/simple/filedata/ChunkInputFormatTest.java
index c31c738..af12302 100644
--- a/src/examples/simple/src/test/java/org/apache/accumulo/examples/simple/filedata/ChunkInputFormatTest.java
+++ b/src/examples/simple/src/test/java/org/apache/accumulo/examples/simple/filedata/ChunkInputFormatTest.java
@@ -30,15 +30,13 @@ import org.apache.accumulo.core.client.BatchWriter;
 import org.apache.accumulo.core.client.Connector;
 import org.apache.accumulo.core.client.TableExistsException;
 import org.apache.accumulo.core.client.TableNotFoundException;
-import org.apache.accumulo.core.client.mapreduce.InputFormatBase.RangeInputSplit;
+import org.apache.accumulo.core.client.mapreduce.RangeInputSplit;
 import org.apache.accumulo.core.client.mock.MockInstance;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.security.ColumnVisibility;
-import org.apache.accumulo.examples.simple.filedata.ChunkInputFormat;
-import org.apache.accumulo.examples.simple.filedata.ChunkInputStream;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.JobID;