You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@accumulo.apache.org by cj...@apache.org on 2013/10/16 06:11:18 UTC

[1/2] git commit: ACCUMULO-391 Javadoc and comments

Updated Branches:
  refs/heads/master a9da42921 -> 01e5296d5


ACCUMULO-391 Javadoc and comments


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/fe239249
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/fe239249
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/fe239249

Branch: refs/heads/master
Commit: fe239249b11bc3de48e423c5e9e50f1f9fe00f5e
Parents: a9da429
Author: Corey J. Nolet <cj...@gmail.com>
Authored: Tue Oct 15 23:17:40 2013 -0400
Committer: Corey J. Nolet <cj...@gmail.com>
Committed: Tue Oct 15 23:20:28 2013 -0400

----------------------------------------------------------------------
 .../core/client/mapred/AbstractInputFormat.java | 13 +++--
 .../core/client/mapred/AccumuloInputFormat.java | 11 ++--
 .../mapred/AccumuloMultiTableInputFormat.java   | 21 ++++++-
 .../core/client/mapred/InputFormatBase.java     | 57 ++++++++++++-------
 .../client/mapreduce/AbstractInputFormat.java   |  7 ++-
 .../client/mapreduce/AccumuloInputFormat.java   |  3 -
 .../AccumuloMultiTableInputFormat.java          | 31 +++++------
 .../core/client/mapreduce/BatchScanConfig.java  |  2 -
 .../core/client/mapreduce/InputFormatBase.java  | 58 ++++++++++----------
 .../mapreduce/lib/util/InputConfigurator.java   |  8 +--
 10 files changed, 124 insertions(+), 87 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/fe239249/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
index ba1dc6a..d474c85 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
@@ -63,6 +63,10 @@ import org.apache.hadoop.mapred.RecordReader;
 import org.apache.log4j.Level;
 import org.apache.log4j.Logger;
 
+/**
+ * An abstract input format to provide shared methods common to all other input format classes. At the very least, any classes inheriting from this class will
+ * need to define their own {@link RecordReader}.
+ */
 public abstract class AbstractInputFormat<K,V> implements InputFormat<K,V> {
   protected static final Class<?> CLASS = AccumuloInputFormat.class;
   protected static final Logger log = Logger.getLogger(CLASS);
@@ -270,7 +274,7 @@ public abstract class AbstractInputFormat<K,V> implements InputFormat<K,V> {
    * @return an Accumulo tablet locator
    * @throws org.apache.accumulo.core.client.TableNotFoundException
    *           if the table name set on the configuration doesn't exist
-   * @since 1.5.0
+   * @since 1.6.0
    */
   protected static TabletLocator getTabletLocator(JobConf job, String tableName) throws TableNotFoundException {
     return InputConfigurator.getTabletLocator(CLASS, job, tableName);
@@ -291,11 +295,11 @@ public abstract class AbstractInputFormat<K,V> implements InputFormat<K,V> {
   }
 
   /**
-   * Fetches all {@link org.apache.accumulo.core.client.mapreduce.BatchScanConfig}s that have been set on the given Hadoop configuration.
+   * Fetches all {@link BatchScanConfig}s that have been set on the given Hadoop job.
    * 
    * @param job
    *          the Hadoop job instance to be configured
-   * @return
+   * @return the {@link BatchScanConfig} objects set on the job
    * @since 1.6.0
    */
   public static Map<String,BatchScanConfig> getBatchScanConfigs(JobConf job) {
@@ -316,7 +320,7 @@ public abstract class AbstractInputFormat<K,V> implements InputFormat<K,V> {
    * @since 1.6.0
    */
   public static BatchScanConfig getBatchScanConfig(JobConf job, String tableName) {
-    return InputConfigurator.getTableQueryConfig(CLASS, job, tableName);
+    return InputConfigurator.getBatchScanConfig(CLASS, job, tableName);
   }
 
   /**
@@ -343,6 +347,7 @@ public abstract class AbstractInputFormat<K,V> implements InputFormat<K,V> {
      *          the scanner for which to configure the iterators
      * @param tableName
      *          the table name for which the scanner is configured
+     * @since 1.6.0
      */
     protected abstract void setupIterators(JobConf job, Scanner scanner, String tableName);
 

http://git-wip-us.apache.org/repos/asf/accumulo/blob/fe239249/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormat.java
index f7b6966..cccd7b8 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormat.java
@@ -41,18 +41,17 @@ import org.apache.hadoop.mapred.Reporter;
  * <li>{@link AccumuloInputFormat#setConnectorInfo(JobConf, String, String)}
  * <li>{@link AccumuloInputFormat#setScanAuthorizations(JobConf, Authorizations)}
  * <li>{@link AccumuloInputFormat#setZooKeeperInstance(JobConf, String, String)} OR {@link AccumuloInputFormat#setMockInstance(JobConf, String)}
- * <li>{@link AccumuloInputFormat#setTableQueryConfigs(JobConf, org.apache.accumulo.core.client.mapreduce.BatchScanConfig...)}</li>
  * </ul>
  * 
  * Other static methods are optional.
  */
 public class AccumuloInputFormat extends InputFormatBase<Key,Value> {
-  
+
   @Override
   public RecordReader<Key,Value> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
     log.setLevel(getLogLevel(job));
     RecordReaderBase<Key,Value> recordReader = new RecordReaderBase<Key,Value>() {
-      
+
       @Override
       public boolean next(Key key, Value value) throws IOException {
         if (scannerIterator.hasNext()) {
@@ -66,17 +65,17 @@ public class AccumuloInputFormat extends InputFormatBase<Key,Value> {
         }
         return false;
       }
-      
+
       @Override
       public Key createKey() {
         return new Key();
       }
-      
+
       @Override
       public Value createValue() {
         return new Value();
       }
-      
+
     };
     recordReader.initialize(split, job);
     return recordReader;

http://git-wip-us.apache.org/repos/asf/accumulo/blob/fe239249/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormat.java
index 4e826b9..ed51866 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormat.java
@@ -29,10 +29,27 @@ import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.mapred.Reporter;
 
-public class AccumuloMultiTableInputFormat extends AbstractInputFormat {
+/**
+ * This class allows MapReduce jobs to use multiple Accumulo tables as the source of data. This {@link org.apache.hadoop.mapred.InputFormat} provides keys and
+ * values of type {@link Key} and {@link Value} to the Map function.
+ * 
+ * The user must specify the following via static configurator methods:
+ * 
+ * <ul>
+ * <li>{@link AccumuloInputFormat#setConnectorInfo(JobConf, String, org.apache.accumulo.core.client.security.tokens.AuthenticationToken)}
+ * <li>{@link AccumuloInputFormat#setConnectorInfo(JobConf, String, String)}
+ * <li>{@link AccumuloInputFormat#setScanAuthorizations(JobConf, org.apache.accumulo.core.security.Authorizations)}
+ * <li>{@link AccumuloInputFormat#setZooKeeperInstance(JobConf, String, String)} OR {@link AccumuloInputFormat#setMockInstance(JobConf, String)}
+ * <li>{@link AccumuloMultiTableInputFormat#setBatchScanConfigs(org.apache.hadoop.mapred.JobConf, java.util.Map)}
+ * </ul>
+ * 
+ * Other static methods are optional.
+ */
+
+public class AccumuloMultiTableInputFormat extends AbstractInputFormat<Key,Value> {
 
   /**
-   * Sets the {@link org.apache.accumulo.core.client.mapreduce.BatchScanConfig} objects on the given Hadoop configuration
+   * Sets the {@link BatchScanConfig} objects on the given Hadoop configuration
    * 
    * @param job
    *          the Hadoop job instance to be configured

http://git-wip-us.apache.org/repos/asf/accumulo/blob/fe239249/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
index 25c23a6..b368bf3 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
@@ -25,6 +25,8 @@ import org.apache.accumulo.core.client.ClientSideIteratorScanner;
 import org.apache.accumulo.core.client.IsolatedScanner;
 import org.apache.accumulo.core.client.IteratorSetting;
 import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.impl.TabletLocator;
 import org.apache.accumulo.core.client.mapreduce.lib.util.InputConfigurator;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Range;
@@ -48,7 +50,7 @@ import org.apache.hadoop.mapred.Reporter;
  * See {@link AccumuloInputFormat} for an example implementation.
  */
 public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
-  
+
   /**
    * Sets the name of the input table, over which this job will scan.
    * 
@@ -61,7 +63,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   public static void setInputTableName(JobConf job, String tableName) {
     InputConfigurator.setInputTableName(CLASS, job, tableName);
   }
-  
+
   /**
    * Gets the table name from the configuration.
    * 
@@ -74,6 +76,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   protected static String getInputTableName(JobConf job) {
     return InputConfigurator.getInputTableName(CLASS, job);
   }
+
   /**
    * Sets the input ranges to scan for this job. If not set, the entire table will be scanned.
    * 
@@ -86,7 +89,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   public static void setRanges(JobConf job, Collection<Range> ranges) {
     InputConfigurator.setRanges(CLASS, job, ranges);
   }
-  
+
   /**
    * Gets the ranges to scan over from a job.
    * 
@@ -101,7 +104,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   protected static List<Range> getRanges(JobConf job) throws IOException {
     return InputConfigurator.getRanges(CLASS, job);
   }
-  
+
   /**
    * Restricts the columns that will be mapped over for this job.
    * 
@@ -115,7 +118,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   public static void fetchColumns(JobConf job, Collection<Pair<Text,Text>> columnFamilyColumnQualifierPairs) {
     InputConfigurator.fetchColumns(CLASS, job, columnFamilyColumnQualifierPairs);
   }
-  
+
   /**
    * Gets the columns to be mapped over from this job.
    * 
@@ -128,7 +131,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   protected static Set<Pair<Text,Text>> getFetchedColumns(JobConf job) {
     return InputConfigurator.getFetchedColumns(CLASS, job);
   }
-  
+
   /**
    * Encode an iterator on the input for this job.
    * 
@@ -141,7 +144,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   public static void addIterator(JobConf job, IteratorSetting cfg) {
     InputConfigurator.addIterator(CLASS, job, cfg);
   }
-  
+
   /**
    * Gets a list of the iterator settings (for iterators to apply to a scanner) from this configuration.
    * 
@@ -154,7 +157,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   protected static List<IteratorSetting> getIterators(JobConf job) {
     return InputConfigurator.getIterators(CLASS, job);
   }
-  
+
   /**
    * Controls the automatic adjustment of ranges for this job. This feature merges overlapping ranges, then splits them to align with tablet boundaries.
    * Disabling this feature will cause exactly one Map task to be created for each specified range. The default setting is enabled. *
@@ -172,7 +175,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   public static void setAutoAdjustRanges(JobConf job, boolean enableFeature) {
     InputConfigurator.setAutoAdjustRanges(CLASS, job, enableFeature);
   }
-  
+
   /**
    * Determines whether a configuration has auto-adjust ranges enabled.
    * 
@@ -185,7 +188,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   protected static boolean getAutoAdjustRanges(JobConf job) {
     return InputConfigurator.getAutoAdjustRanges(CLASS, job);
   }
-  
+
   /**
    * Controls the use of the {@link IsolatedScanner} in this job.
    * 
@@ -201,7 +204,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   public static void setScanIsolation(JobConf job, boolean enableFeature) {
     InputConfigurator.setScanIsolation(CLASS, job, enableFeature);
   }
-  
+
   /**
    * Determines whether a configuration has isolation enabled.
    * 
@@ -214,7 +217,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   protected static boolean isIsolated(JobConf job) {
     return InputConfigurator.isIsolated(CLASS, job);
   }
-  
+
   /**
    * Controls the use of the {@link ClientSideIteratorScanner} in this job. Enabling this feature will cause the iterator stack to be constructed within the Map
    * task, rather than within the Accumulo TServer. To use this feature, all classes needed for those iterators must be available on the classpath for the task.
@@ -231,7 +234,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   public static void setLocalIterators(JobConf job, boolean enableFeature) {
     InputConfigurator.setLocalIterators(CLASS, job, enableFeature);
   }
-  
+
   /**
    * Determines whether a configuration uses local iterators.
    * 
@@ -244,7 +247,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   protected static boolean usesLocalIterators(JobConf job) {
     return InputConfigurator.usesLocalIterators(CLASS, job);
   }
-  
+
   /**
    * <p>
    * Enable reading offline tables. By default, this feature is disabled and only online tables are scanned. This will make the map reduce job directly read the
@@ -279,7 +282,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   public static void setOfflineTableScan(JobConf job, boolean enableFeature) {
     InputConfigurator.setOfflineTableScan(CLASS, job, enableFeature);
   }
-  
+
   /**
    * Determines whether a configuration has the offline table scan feature enabled.
    * 
@@ -292,14 +295,30 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   protected static boolean isOfflineScan(JobConf job) {
     return InputConfigurator.isOfflineScan(CLASS, job);
   }
-  
+
+  /**
+   * Initializes an Accumulo {@link org.apache.accumulo.core.client.impl.TabletLocator} based on the configuration.
+   * 
+   * @param job
+   *          the Hadoop job for the configured job
+   * @return an Accumulo tablet locator
+   * @throws org.apache.accumulo.core.client.TableNotFoundException
+   *           if the table name set on the job doesn't exist
+   * @since 1.5.0
+   * @deprecated since 1.6.0
+   */
+  @Deprecated
+  protected static TabletLocator getTabletLocator(JobConf job) throws TableNotFoundException {
+    return InputConfigurator.getTabletLocator(CLASS, job, InputConfigurator.getInputTableName(CLASS, job));
+  }
+
   protected abstract static class RecordReaderBase<K,V> extends AbstractRecordReader<K,V> {
 
     @Override
     protected void setupIterators(JobConf job, Scanner scanner, String tableName) {
       setupIterators(job, scanner);
     }
-        
+
     /**
      * Apply the configured iterators from the configuration to the scanner.
      * 
@@ -310,9 +329,9 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
      */
     protected void setupIterators(JobConf job, Scanner scanner) {
       List<IteratorSetting> iterators = getIterators(job);
-      for (IteratorSetting iterator : iterators) 
+      for (IteratorSetting iterator : iterators)
         scanner.addScanIterator(iterator);
     }
   }
-  
+
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/fe239249/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
index fad414f..c07a62e 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
@@ -71,6 +71,10 @@ import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.log4j.Level;
 import org.apache.log4j.Logger;
 
+/**
+ * An abstract input format to provide shared methods common to all other input format classes. At the very least, any classes inheriting from this class will
+ * need to define their own {@link RecordReader}.
+ */
 public abstract class AbstractInputFormat<K,V> extends InputFormat<K,V> {
 
   protected static final Class<?> CLASS = AccumuloInputFormat.class;
@@ -296,7 +300,7 @@ public abstract class AbstractInputFormat<K,V> extends InputFormat<K,V> {
    * @since 1.6.0
    */
   protected static BatchScanConfig getBatchScanConfig(JobContext context, String tableName) {
-    return InputConfigurator.getTableQueryConfig(CLASS, getConfiguration(context), tableName);
+    return InputConfigurator.getBatchScanConfig(CLASS, getConfiguration(context), tableName);
   }
 
   /**
@@ -355,6 +359,7 @@ public abstract class AbstractInputFormat<K,V> extends InputFormat<K,V> {
      *          the scanner for which to configure the iterators
      * @param tableName
      *          the table name for which the scanner is configured
+     * @since 1.6.0
      */
     protected abstract void setupIterators(TaskAttemptContext context, Scanner scanner, String tableName);
 

http://git-wip-us.apache.org/repos/asf/accumulo/blob/fe239249/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java
index cf4b376..9ecae53 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java
@@ -17,11 +17,8 @@
 package org.apache.accumulo.core.client.mapreduce;
 
 import java.io.IOException;
-import java.util.List;
 import java.util.Map.Entry;
 
-import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.client.Scanner;
 import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Value;

http://git-wip-us.apache.org/repos/asf/accumulo/blob/fe239249/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormat.java
index de24f68..06bcd01 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormat.java
@@ -16,6 +16,12 @@
  */
 package org.apache.accumulo.core.client.mapreduce;
 
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
 import org.apache.accumulo.core.client.IteratorSetting;
 import org.apache.accumulo.core.client.Scanner;
 import org.apache.accumulo.core.client.mapreduce.lib.util.InputConfigurator;
@@ -27,17 +33,9 @@ import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
 /**
- * This class allows MapReduce jobs to use multiple Accumulo tables as the source of data. This {@link org.apache.hadoop.mapreduce.InputFormat} provides keys 
- * and values of 
- * type {@link Key} and
- * {@link Value} to the Map function. 
+ * This class allows MapReduce jobs to use multiple Accumulo tables as the source of data. This {@link org.apache.hadoop.mapreduce.InputFormat} provides keys
+ * and values of type {@link Key} and {@link Value} to the Map function.
  * 
  * The user must specify the following via static configurator methods:
  * 
@@ -50,9 +48,8 @@ import static com.google.common.base.Preconditions.checkNotNull;
  * 
  * Other static methods are optional.
  */
+public class AccumuloMultiTableInputFormat extends AbstractInputFormat<Key,Value> {
 
-public class AccumuloMultiTableInputFormat extends AbstractInputFormat<Key,Value>{
-  
   /**
    * Sets the {@link BatchScanConfig} objects on the given Hadoop configuration
    * 
@@ -62,15 +59,15 @@ public class AccumuloMultiTableInputFormat extends AbstractInputFormat<Key,Value
    *          the table query configs to be set on the configuration.
    * @since 1.6.0
    */
-  public static void setBatchScanConfigs(Job job, Map<String, BatchScanConfig> configs) {
+  public static void setBatchScanConfigs(Job job, Map<String,BatchScanConfig> configs) {
     checkNotNull(configs);
     InputConfigurator.setBatchScanConfigs(CLASS, getConfiguration(job), configs);
   }
-  
+
   @Override
-  public RecordReader<Key, Value> createRecordReader(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
+  public RecordReader<Key,Value> createRecordReader(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
     log.setLevel(getLogLevel(context));
-    return new AbstractRecordReader<Key, Value>() {
+    return new AbstractRecordReader<Key,Value>() {
       @Override
       public boolean nextKeyValue() throws IOException, InterruptedException {
         if (scannerIterator.hasNext()) {
@@ -88,7 +85,7 @@ public class AccumuloMultiTableInputFormat extends AbstractInputFormat<Key,Value
       @Override
       protected void setupIterators(TaskAttemptContext context, Scanner scanner, String tableName) {
         List<IteratorSetting> iterators = getBatchScanConfig(context, tableName).getIterators();
-        for(IteratorSetting setting : iterators) {
+        for (IteratorSetting setting : iterators) {
           scanner.addScanIterator(setting);
         }
       }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/fe239249/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java
index feb49bb..e4b89ca 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java
@@ -16,8 +16,6 @@
  */
 package org.apache.accumulo.core.client.mapreduce;
 
-import static com.google.common.base.Preconditions.checkNotNull;
-
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;

http://git-wip-us.apache.org/repos/asf/accumulo/blob/fe239249/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
index af893a8..b85bfd6 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
@@ -310,35 +310,35 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   protected static TabletLocator getTabletLocator(JobContext context) throws TableNotFoundException {
     return InputConfigurator.getTabletLocator(CLASS, getConfiguration(context), InputConfigurator.getInputTableName(CLASS, getConfiguration(context)));
   }
-  
+
   protected abstract static class RecordReaderBase<K,V> extends AbstractRecordReader<K,V> {
-    
-      /**
-       * Apply the configured iterators from the configuration to the scanner for the specified table name
-       * 
-       * @param context
-       *          the Hadoop context for the configured job
-       * @param scanner
-       *          the scanner to configure
-       * @since 1.6.0
-       */
-      @Override
-      protected void setupIterators(TaskAttemptContext context, Scanner scanner, String tableName) {
-        setupIterators(context, scanner);
-      }
-      
-      /**
-       * Apply the configured iterators from the configuration to the scanner.
-       * 
-       * @param context
-       *          the Hadoop context for the configured job
-       * @param scanner
-       *          the scanner to configure
-       */
-      protected void setupIterators(TaskAttemptContext context, Scanner scanner) {
-        List<IteratorSetting> iterators = getIterators(context);
-        for (IteratorSetting iterator : iterators) 
-          scanner.addScanIterator(iterator);
-      }
+
+    /**
+     * Apply the configured iterators from the configuration to the scanner for the specified table name
+     * 
+     * @param context
+     *          the Hadoop context for the configured job
+     * @param scanner
+     *          the scanner to configure
+     * @since 1.6.0
+     */
+    @Override
+    protected void setupIterators(TaskAttemptContext context, Scanner scanner, String tableName) {
+      setupIterators(context, scanner);
+    }
+
+    /**
+     * Apply the configured iterators from the configuration to the scanner.
+     * 
+     * @param context
+     *          the Hadoop context for the configured job
+     * @param scanner
+     *          the scanner to configure
+     */
+    protected void setupIterators(TaskAttemptContext context, Scanner scanner) {
+      List<IteratorSetting> iterators = getIterators(context);
+      for (IteratorSetting iterator : iterators)
+        scanner.addScanIterator(iterator);
+    }
   }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/fe239249/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/util/InputConfigurator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/util/InputConfigurator.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/util/InputConfigurator.java
index 4dac750..016efa5 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/util/InputConfigurator.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/util/InputConfigurator.java
@@ -485,7 +485,7 @@ public class InputConfigurator extends ConfiguratorBase {
    * @param conf
    *          the Hadoop configuration object to configure
    * @param configs
-   *          an array of {@link org.apache.accumulo.core.client.mapreduce.BatchScanConfig} objects to associate with the job
+   *          an array of {@link BatchScanConfig} objects to associate with the job
    * @since 1.6.0
    */
   public static void setBatchScanConfigs(Class<?> implementingClass, Configuration conf, Map<String,BatchScanConfig> configs) {
@@ -505,7 +505,7 @@ public class InputConfigurator extends ConfiguratorBase {
   }
 
   /**
-   * Returns all {@link org.apache.accumulo.core.client.mapreduce.BatchScanConfig} objects associated with this job.
+   * Returns all {@link BatchScanConfig} objects associated with this job.
    * 
    * @param implementingClass
    *          the class whose name will be used as a prefix for the property configuration key
@@ -538,7 +538,7 @@ public class InputConfigurator extends ConfiguratorBase {
   }
 
   /**
-   * Returns the {@link org.apache.accumulo.core.client.mapreduce.BatchScanConfig} for the given table
+   * Returns the {@link BatchScanConfig} for the given table
    * 
    * @param implementingClass
    *          the class whose name will be used as a prefix for the property configuration key
@@ -549,7 +549,7 @@ public class InputConfigurator extends ConfiguratorBase {
    * @return the table query config for the given table name (if it exists) and null if it does not
    * @since 1.6.0
    */
-  public static BatchScanConfig getTableQueryConfig(Class<?> implementingClass, Configuration conf, String tableName) {
+  public static BatchScanConfig getBatchScanConfig(Class<?> implementingClass, Configuration conf, String tableName) {
     Map<String,BatchScanConfig> queryConfigs = getBatchScanConfigs(implementingClass, conf);
     return queryConfigs.get(tableName);
   }

[2/2] git commit: ACCUMULO-1746 Adding documentation for MultiTableInputFormat

Posted by cj...@apache.org.

ACCUMULO-1746 Adding documentation for MultiTableInputFormat


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/01e5296d
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/01e5296d
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/01e5296d

Branch: refs/heads/master
Commit: 01e5296d5b12d20ea98425e652a472ddbfa954c6
Parents: fe23924
Author: Corey J. Nolet <cj...@gmail.com>
Authored: Wed Oct 16 00:07:07 2013 -0400
Committer: Corey J. Nolet <cj...@gmail.com>
Committed: Wed Oct 16 00:07:07 2013 -0400

----------------------------------------------------------------------
 .../core/client/mapreduce/BatchScanConfig.java  |  7 +-
 .../accumulo_user_manual/chapters/analytics.tex | 80 ++++++++++++++++++++
 2 files changed, 84 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/01e5296d/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java
index e4b89ca..985df55 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java
@@ -20,6 +20,7 @@ import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
@@ -37,7 +38,7 @@ public class BatchScanConfig implements Writable {
 
   private List<IteratorSetting> iterators;
   private List<Range> ranges;
-  private Set<Pair<Text,Text>> columns;
+  private Collection<Pair<Text,Text>> columns;
 
   private boolean autoAdjustRanges = true;
   private boolean useLocalIterators = false;
@@ -83,7 +84,7 @@ public class BatchScanConfig implements Writable {
    *          selected. An empty set is the default and is equivalent to scanning the all columns.
    * @since 1.6.0
    */
-  public BatchScanConfig fetchColumns(Set<Pair<Text,Text>> columns) {
+  public BatchScanConfig fetchColumns(Collection<Pair<Text,Text>> columns) {
     this.columns = columns;
     return this;
   }
@@ -91,7 +92,7 @@ public class BatchScanConfig implements Writable {
   /**
    * Returns the columns to be fetched for this configuration
    */
-  public Set<Pair<Text,Text>> getFetchedColumns() {
+  public Collection<Pair<Text,Text>> getFetchedColumns() {
     return columns != null ? columns : new HashSet<Pair<Text,Text>>();
   }
 

http://git-wip-us.apache.org/repos/asf/accumulo/blob/01e5296d/docs/src/main/latex/accumulo_user_manual/chapters/analytics.tex
----------------------------------------------------------------------
diff --git a/docs/src/main/latex/accumulo_user_manual/chapters/analytics.tex b/docs/src/main/latex/accumulo_user_manual/chapters/analytics.tex
index d10baa8..7bbd177 100644
--- a/docs/src/main/latex/accumulo_user_manual/chapters/analytics.tex
+++ b/docs/src/main/latex/accumulo_user_manual/chapters/analytics.tex
@@ -125,6 +125,86 @@ AccumuloInputFormat.addIterator(job, is);
 \end{verbatim}
 \normalsize
 
+\subsection{AccumuloMultiTableInputFormat options}
+
+The AccumuloMultiTableInputFormat allows the scanning over multiple tables 
+in a single MapReduce job. Separate ranges, columns, and iterators can be 
+used for each table. 
+
+\small
+\begin{verbatim}
+BatchScanConfig tableOneConfig = new BatchScanConfig();
+BatchScanConfig tableTwoConfig = new BatchScanConfig();
+\end{verbatim}
+\normalsize
+
+To set the configuration objects on the job:
+
+\small
+\begin{verbatim}
+Map<String, BatchScanConfig> configs = new HashMap<String,BatchScanConfig>();
+configs.put("table1", tableOneConfig);
+configs.put("table2", tableTwoConfig);
+AccumuloMultiTableInputFormat.setBatchScanConfigs(job, configs);
+\end{verbatim}
+\normalsize
+
+\Large
+\textbf{Optional settings:}
+\normalsize
+
+To restrict to a set of ranges:
+
+\small
+\begin{verbatim}
+ArrayList<Range> tableOneRanges = new ArrayList<Range>();
+ArrayList<Range> tableTwoRanges = new ArrayList<Range>();
+// populate array lists of row ranges for tables...
+tableOneConfig.setRanges(tableOneRanges);
+tableTwoConfig.setRanges(tableTwoRanges);
+\end{verbatim}
+\normalsize
+
+To restrict Accumulo to a list of columns:
+
+\small
+\begin{verbatim}
+ArrayList<Pair<Text,Text>> tableOneColumns = new ArrayList<Pair<Text,Text>>();
+ArrayList<Pair<Text,Text>> tableTwoColumns = new ArrayList<Pair<Text,Text>>();
+// populate lists of columns for each of the tables ...
+tableOneConfig.fetchColumns(tableOneColumns);
+tableTwoConfig.fetchColumns(tableTwoColumns);
+\end{verbatim}
+\normalsize
+
+To set scan iterators:
+
+\small
+\begin{verbatim}
+List<IteratorSetting> tableOneIterators = new ArrayList<IteratorSetting>();
+List<IteratorSetting> tableTwoIterators = new ArrayList<IteratorSetting>();
+// populate the lists of iterator settings for each of the tables ...
+tableOneConfig.setIterators(tableOneIterators);
+tableTwoConfig.setIterators(tableTwoIterators);
+\end{verbatim}
+\normalsize
+
+
+The name of the table can be retrieved from the input split:
+
+\small
+\begin{verbatim}
+class MyMapper extends Mapper<Key,Value,WritableComparable,Writable> {
+    public void map(Key k, Value v, Context c) {
+        RangeInputSplit split = (RangeInputSplit)c.getInputSplit();
+        String tableName = split.getTableName();
+        // do something with table name 
+    }
+}
+\end{verbatim}
+\normalsize
+
+
 \subsection{AccumuloOutputFormat options}
 
 \small