You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by cj...@apache.org on 2013/10/16 06:11:18 UTC
[1/2] git commit: ACCUMULO-391 Javadoc and comments
Updated Branches:
refs/heads/master a9da42921 -> 01e5296d5
ACCUMULO-391 Javadoc and comments
Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/fe239249
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/fe239249
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/fe239249
Branch: refs/heads/master
Commit: fe239249b11bc3de48e423c5e9e50f1f9fe00f5e
Parents: a9da429
Author: Corey J. Nolet <cj...@gmail.com>
Authored: Tue Oct 15 23:17:40 2013 -0400
Committer: Corey J. Nolet <cj...@gmail.com>
Committed: Tue Oct 15 23:20:28 2013 -0400
----------------------------------------------------------------------
.../core/client/mapred/AbstractInputFormat.java | 13 +++--
.../core/client/mapred/AccumuloInputFormat.java | 11 ++--
.../mapred/AccumuloMultiTableInputFormat.java | 21 ++++++-
.../core/client/mapred/InputFormatBase.java | 57 ++++++++++++-------
.../client/mapreduce/AbstractInputFormat.java | 7 ++-
.../client/mapreduce/AccumuloInputFormat.java | 3 -
.../AccumuloMultiTableInputFormat.java | 31 +++++------
.../core/client/mapreduce/BatchScanConfig.java | 2 -
.../core/client/mapreduce/InputFormatBase.java | 58 ++++++++++----------
.../mapreduce/lib/util/InputConfigurator.java | 8 +--
10 files changed, 124 insertions(+), 87 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/accumulo/blob/fe239249/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
index ba1dc6a..d474c85 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
@@ -63,6 +63,10 @@ import org.apache.hadoop.mapred.RecordReader;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
+/**
+ * An abstract input format to provide shared methods common to all other input format classes. At the very least, any classes inheriting from this class will
+ * need to define their own {@link RecordReader}.
+ */
public abstract class AbstractInputFormat<K,V> implements InputFormat<K,V> {
protected static final Class<?> CLASS = AccumuloInputFormat.class;
protected static final Logger log = Logger.getLogger(CLASS);
@@ -270,7 +274,7 @@ public abstract class AbstractInputFormat<K,V> implements InputFormat<K,V> {
* @return an Accumulo tablet locator
* @throws org.apache.accumulo.core.client.TableNotFoundException
* if the table name set on the configuration doesn't exist
- * @since 1.5.0
+ * @since 1.6.0
*/
protected static TabletLocator getTabletLocator(JobConf job, String tableName) throws TableNotFoundException {
return InputConfigurator.getTabletLocator(CLASS, job, tableName);
@@ -291,11 +295,11 @@ public abstract class AbstractInputFormat<K,V> implements InputFormat<K,V> {
}
/**
- * Fetches all {@link org.apache.accumulo.core.client.mapreduce.BatchScanConfig}s that have been set on the given Hadoop configuration.
+ * Fetches all {@link BatchScanConfig}s that have been set on the given Hadoop job.
*
* @param job
* the Hadoop job instance to be configured
- * @return
+ * @return the {@link BatchScanConfig} objects set on the job
* @since 1.6.0
*/
public static Map<String,BatchScanConfig> getBatchScanConfigs(JobConf job) {
@@ -316,7 +320,7 @@ public abstract class AbstractInputFormat<K,V> implements InputFormat<K,V> {
* @since 1.6.0
*/
public static BatchScanConfig getBatchScanConfig(JobConf job, String tableName) {
- return InputConfigurator.getTableQueryConfig(CLASS, job, tableName);
+ return InputConfigurator.getBatchScanConfig(CLASS, job, tableName);
}
/**
@@ -343,6 +347,7 @@ public abstract class AbstractInputFormat<K,V> implements InputFormat<K,V> {
* the scanner for which to configure the iterators
* @param tableName
* the table name for which the scanner is configured
+ * @since 1.6.0
*/
protected abstract void setupIterators(JobConf job, Scanner scanner, String tableName);
http://git-wip-us.apache.org/repos/asf/accumulo/blob/fe239249/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormat.java
index f7b6966..cccd7b8 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormat.java
@@ -41,18 +41,17 @@ import org.apache.hadoop.mapred.Reporter;
* <li>{@link AccumuloInputFormat#setConnectorInfo(JobConf, String, String)}
* <li>{@link AccumuloInputFormat#setScanAuthorizations(JobConf, Authorizations)}
* <li>{@link AccumuloInputFormat#setZooKeeperInstance(JobConf, String, String)} OR {@link AccumuloInputFormat#setMockInstance(JobConf, String)}
- * <li>{@link AccumuloInputFormat#setTableQueryConfigs(JobConf, org.apache.accumulo.core.client.mapreduce.BatchScanConfig...)}</li>
* </ul>
*
* Other static methods are optional.
*/
public class AccumuloInputFormat extends InputFormatBase<Key,Value> {
-
+
@Override
public RecordReader<Key,Value> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
log.setLevel(getLogLevel(job));
RecordReaderBase<Key,Value> recordReader = new RecordReaderBase<Key,Value>() {
-
+
@Override
public boolean next(Key key, Value value) throws IOException {
if (scannerIterator.hasNext()) {
@@ -66,17 +65,17 @@ public class AccumuloInputFormat extends InputFormatBase<Key,Value> {
}
return false;
}
-
+
@Override
public Key createKey() {
return new Key();
}
-
+
@Override
public Value createValue() {
return new Value();
}
-
+
};
recordReader.initialize(split, job);
return recordReader;
http://git-wip-us.apache.org/repos/asf/accumulo/blob/fe239249/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormat.java
index 4e826b9..ed51866 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormat.java
@@ -29,10 +29,27 @@ import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
-public class AccumuloMultiTableInputFormat extends AbstractInputFormat {
+/**
+ * This class allows MapReduce jobs to use multiple Accumulo tables as the source of data. This {@link org.apache.hadoop.mapred.InputFormat} provides keys and
+ * values of type {@link Key} and {@link Value} to the Map function.
+ *
+ * The user must specify the following via static configurator methods:
+ *
+ * <ul>
+ * <li>{@link AccumuloInputFormat#setConnectorInfo(JobConf, String, org.apache.accumulo.core.client.security.tokens.AuthenticationToken)}
+ * <li>{@link AccumuloInputFormat#setConnectorInfo(JobConf, String, String)}
+ * <li>{@link AccumuloInputFormat#setScanAuthorizations(JobConf, org.apache.accumulo.core.security.Authorizations)}
+ * <li>{@link AccumuloInputFormat#setZooKeeperInstance(JobConf, String, String)} OR {@link AccumuloInputFormat#setMockInstance(JobConf, String)}
+ * <li>{@link AccumuloMultiTableInputFormat#setBatchScanConfigs(org.apache.hadoop.mapred.JobConf, java.util.Map)}
+ * </ul>
+ *
+ * Other static methods are optional.
+ */
+
+public class AccumuloMultiTableInputFormat extends AbstractInputFormat<Key,Value> {
/**
- * Sets the {@link org.apache.accumulo.core.client.mapreduce.BatchScanConfig} objects on the given Hadoop configuration
+ * Sets the {@link BatchScanConfig} objects on the given Hadoop configuration
*
* @param job
* the Hadoop job instance to be configured
http://git-wip-us.apache.org/repos/asf/accumulo/blob/fe239249/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
index 25c23a6..b368bf3 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
@@ -25,6 +25,8 @@ import org.apache.accumulo.core.client.ClientSideIteratorScanner;
import org.apache.accumulo.core.client.IsolatedScanner;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.impl.TabletLocator;
import org.apache.accumulo.core.client.mapreduce.lib.util.InputConfigurator;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Range;
@@ -48,7 +50,7 @@ import org.apache.hadoop.mapred.Reporter;
* See {@link AccumuloInputFormat} for an example implementation.
*/
public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
-
+
/**
* Sets the name of the input table, over which this job will scan.
*
@@ -61,7 +63,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
public static void setInputTableName(JobConf job, String tableName) {
InputConfigurator.setInputTableName(CLASS, job, tableName);
}
-
+
/**
* Gets the table name from the configuration.
*
@@ -74,6 +76,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
protected static String getInputTableName(JobConf job) {
return InputConfigurator.getInputTableName(CLASS, job);
}
+
/**
* Sets the input ranges to scan for this job. If not set, the entire table will be scanned.
*
@@ -86,7 +89,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
public static void setRanges(JobConf job, Collection<Range> ranges) {
InputConfigurator.setRanges(CLASS, job, ranges);
}
-
+
/**
* Gets the ranges to scan over from a job.
*
@@ -101,7 +104,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
protected static List<Range> getRanges(JobConf job) throws IOException {
return InputConfigurator.getRanges(CLASS, job);
}
-
+
/**
* Restricts the columns that will be mapped over for this job.
*
@@ -115,7 +118,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
public static void fetchColumns(JobConf job, Collection<Pair<Text,Text>> columnFamilyColumnQualifierPairs) {
InputConfigurator.fetchColumns(CLASS, job, columnFamilyColumnQualifierPairs);
}
-
+
/**
* Gets the columns to be mapped over from this job.
*
@@ -128,7 +131,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
protected static Set<Pair<Text,Text>> getFetchedColumns(JobConf job) {
return InputConfigurator.getFetchedColumns(CLASS, job);
}
-
+
/**
* Encode an iterator on the input for this job.
*
@@ -141,7 +144,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
public static void addIterator(JobConf job, IteratorSetting cfg) {
InputConfigurator.addIterator(CLASS, job, cfg);
}
-
+
/**
* Gets a list of the iterator settings (for iterators to apply to a scanner) from this configuration.
*
@@ -154,7 +157,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
protected static List<IteratorSetting> getIterators(JobConf job) {
return InputConfigurator.getIterators(CLASS, job);
}
-
+
/**
* Controls the automatic adjustment of ranges for this job. This feature merges overlapping ranges, then splits them to align with tablet boundaries.
* Disabling this feature will cause exactly one Map task to be created for each specified range. The default setting is enabled. *
@@ -172,7 +175,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
public static void setAutoAdjustRanges(JobConf job, boolean enableFeature) {
InputConfigurator.setAutoAdjustRanges(CLASS, job, enableFeature);
}
-
+
/**
* Determines whether a configuration has auto-adjust ranges enabled.
*
@@ -185,7 +188,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
protected static boolean getAutoAdjustRanges(JobConf job) {
return InputConfigurator.getAutoAdjustRanges(CLASS, job);
}
-
+
/**
* Controls the use of the {@link IsolatedScanner} in this job.
*
@@ -201,7 +204,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
public static void setScanIsolation(JobConf job, boolean enableFeature) {
InputConfigurator.setScanIsolation(CLASS, job, enableFeature);
}
-
+
/**
* Determines whether a configuration has isolation enabled.
*
@@ -214,7 +217,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
protected static boolean isIsolated(JobConf job) {
return InputConfigurator.isIsolated(CLASS, job);
}
-
+
/**
* Controls the use of the {@link ClientSideIteratorScanner} in this job. Enabling this feature will cause the iterator stack to be constructed within the Map
* task, rather than within the Accumulo TServer. To use this feature, all classes needed for those iterators must be available on the classpath for the task.
@@ -231,7 +234,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
public static void setLocalIterators(JobConf job, boolean enableFeature) {
InputConfigurator.setLocalIterators(CLASS, job, enableFeature);
}
-
+
/**
* Determines whether a configuration uses local iterators.
*
@@ -244,7 +247,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
protected static boolean usesLocalIterators(JobConf job) {
return InputConfigurator.usesLocalIterators(CLASS, job);
}
-
+
/**
* <p>
* Enable reading offline tables. By default, this feature is disabled and only online tables are scanned. This will make the map reduce job directly read the
@@ -279,7 +282,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
public static void setOfflineTableScan(JobConf job, boolean enableFeature) {
InputConfigurator.setOfflineTableScan(CLASS, job, enableFeature);
}
-
+
/**
* Determines whether a configuration has the offline table scan feature enabled.
*
@@ -292,14 +295,30 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
protected static boolean isOfflineScan(JobConf job) {
return InputConfigurator.isOfflineScan(CLASS, job);
}
-
+
+ /**
+ * Initializes an Accumulo {@link org.apache.accumulo.core.client.impl.TabletLocator} based on the configuration.
+ *
+ * @param job
+ * the Hadoop job for the configured job
+ * @return an Accumulo tablet locator
+ * @throws org.apache.accumulo.core.client.TableNotFoundException
+ * if the table name set on the job doesn't exist
+ * @since 1.5.0
+ * @deprecated since 1.6.0
+ */
+ @Deprecated
+ protected static TabletLocator getTabletLocator(JobConf job) throws TableNotFoundException {
+ return InputConfigurator.getTabletLocator(CLASS, job, InputConfigurator.getInputTableName(CLASS, job));
+ }
+
protected abstract static class RecordReaderBase<K,V> extends AbstractRecordReader<K,V> {
@Override
protected void setupIterators(JobConf job, Scanner scanner, String tableName) {
setupIterators(job, scanner);
}
-
+
/**
* Apply the configured iterators from the configuration to the scanner.
*
@@ -310,9 +329,9 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
*/
protected void setupIterators(JobConf job, Scanner scanner) {
List<IteratorSetting> iterators = getIterators(job);
- for (IteratorSetting iterator : iterators)
+ for (IteratorSetting iterator : iterators)
scanner.addScanIterator(iterator);
}
}
-
+
}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/fe239249/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
index fad414f..c07a62e 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
@@ -71,6 +71,10 @@ import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
+/**
+ * An abstract input format to provide shared methods common to all other input format classes. At the very least, any classes inheriting from this class will
+ * need to define their own {@link RecordReader}.
+ */
public abstract class AbstractInputFormat<K,V> extends InputFormat<K,V> {
protected static final Class<?> CLASS = AccumuloInputFormat.class;
@@ -296,7 +300,7 @@ public abstract class AbstractInputFormat<K,V> extends InputFormat<K,V> {
* @since 1.6.0
*/
protected static BatchScanConfig getBatchScanConfig(JobContext context, String tableName) {
- return InputConfigurator.getTableQueryConfig(CLASS, getConfiguration(context), tableName);
+ return InputConfigurator.getBatchScanConfig(CLASS, getConfiguration(context), tableName);
}
/**
@@ -355,6 +359,7 @@ public abstract class AbstractInputFormat<K,V> extends InputFormat<K,V> {
* the scanner for which to configure the iterators
* @param tableName
* the table name for which the scanner is configured
+ * @since 1.6.0
*/
protected abstract void setupIterators(TaskAttemptContext context, Scanner scanner, String tableName);
http://git-wip-us.apache.org/repos/asf/accumulo/blob/fe239249/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java
index cf4b376..9ecae53 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java
@@ -17,11 +17,8 @@
package org.apache.accumulo.core.client.mapreduce;
import java.io.IOException;
-import java.util.List;
import java.util.Map.Entry;
-import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.client.Scanner;
import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
http://git-wip-us.apache.org/repos/asf/accumulo/blob/fe239249/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormat.java
index de24f68..06bcd01 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormat.java
@@ -16,6 +16,12 @@
*/
package org.apache.accumulo.core.client.mapreduce;
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.client.Scanner;
import org.apache.accumulo.core.client.mapreduce.lib.util.InputConfigurator;
@@ -27,17 +33,9 @@ import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
/**
- * This class allows MapReduce jobs to use multiple Accumulo tables as the source of data. This {@link org.apache.hadoop.mapreduce.InputFormat} provides keys
- * and values of
- * type {@link Key} and
- * {@link Value} to the Map function.
+ * This class allows MapReduce jobs to use multiple Accumulo tables as the source of data. This {@link org.apache.hadoop.mapreduce.InputFormat} provides keys
+ * and values of type {@link Key} and {@link Value} to the Map function.
*
* The user must specify the following via static configurator methods:
*
@@ -50,9 +48,8 @@ import static com.google.common.base.Preconditions.checkNotNull;
*
* Other static methods are optional.
*/
+public class AccumuloMultiTableInputFormat extends AbstractInputFormat<Key,Value> {
-public class AccumuloMultiTableInputFormat extends AbstractInputFormat<Key,Value>{
-
/**
* Sets the {@link BatchScanConfig} objects on the given Hadoop configuration
*
@@ -62,15 +59,15 @@ public class AccumuloMultiTableInputFormat extends AbstractInputFormat<Key,Value
* the table query configs to be set on the configuration.
* @since 1.6.0
*/
- public static void setBatchScanConfigs(Job job, Map<String, BatchScanConfig> configs) {
+ public static void setBatchScanConfigs(Job job, Map<String,BatchScanConfig> configs) {
checkNotNull(configs);
InputConfigurator.setBatchScanConfigs(CLASS, getConfiguration(job), configs);
}
-
+
@Override
- public RecordReader<Key, Value> createRecordReader(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
+ public RecordReader<Key,Value> createRecordReader(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
log.setLevel(getLogLevel(context));
- return new AbstractRecordReader<Key, Value>() {
+ return new AbstractRecordReader<Key,Value>() {
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
if (scannerIterator.hasNext()) {
@@ -88,7 +85,7 @@ public class AccumuloMultiTableInputFormat extends AbstractInputFormat<Key,Value
@Override
protected void setupIterators(TaskAttemptContext context, Scanner scanner, String tableName) {
List<IteratorSetting> iterators = getBatchScanConfig(context, tableName).getIterators();
- for(IteratorSetting setting : iterators) {
+ for (IteratorSetting setting : iterators) {
scanner.addScanIterator(setting);
}
}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/fe239249/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java
index feb49bb..e4b89ca 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java
@@ -16,8 +16,6 @@
*/
package org.apache.accumulo.core.client.mapreduce;
-import static com.google.common.base.Preconditions.checkNotNull;
-
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
http://git-wip-us.apache.org/repos/asf/accumulo/blob/fe239249/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
index af893a8..b85bfd6 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
@@ -310,35 +310,35 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
protected static TabletLocator getTabletLocator(JobContext context) throws TableNotFoundException {
return InputConfigurator.getTabletLocator(CLASS, getConfiguration(context), InputConfigurator.getInputTableName(CLASS, getConfiguration(context)));
}
-
+
protected abstract static class RecordReaderBase<K,V> extends AbstractRecordReader<K,V> {
-
- /**
- * Apply the configured iterators from the configuration to the scanner for the specified table name
- *
- * @param context
- * the Hadoop context for the configured job
- * @param scanner
- * the scanner to configure
- * @since 1.6.0
- */
- @Override
- protected void setupIterators(TaskAttemptContext context, Scanner scanner, String tableName) {
- setupIterators(context, scanner);
- }
-
- /**
- * Apply the configured iterators from the configuration to the scanner.
- *
- * @param context
- * the Hadoop context for the configured job
- * @param scanner
- * the scanner to configure
- */
- protected void setupIterators(TaskAttemptContext context, Scanner scanner) {
- List<IteratorSetting> iterators = getIterators(context);
- for (IteratorSetting iterator : iterators)
- scanner.addScanIterator(iterator);
- }
+
+ /**
+ * Apply the configured iterators from the configuration to the scanner for the specified table name
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @param scanner
+ * the scanner to configure
+ * @since 1.6.0
+ */
+ @Override
+ protected void setupIterators(TaskAttemptContext context, Scanner scanner, String tableName) {
+ setupIterators(context, scanner);
+ }
+
+ /**
+ * Apply the configured iterators from the configuration to the scanner.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @param scanner
+ * the scanner to configure
+ */
+ protected void setupIterators(TaskAttemptContext context, Scanner scanner) {
+ List<IteratorSetting> iterators = getIterators(context);
+ for (IteratorSetting iterator : iterators)
+ scanner.addScanIterator(iterator);
+ }
}
}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/fe239249/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/util/InputConfigurator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/util/InputConfigurator.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/util/InputConfigurator.java
index 4dac750..016efa5 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/util/InputConfigurator.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/util/InputConfigurator.java
@@ -485,7 +485,7 @@ public class InputConfigurator extends ConfiguratorBase {
* @param conf
* the Hadoop configuration object to configure
* @param configs
- * an array of {@link org.apache.accumulo.core.client.mapreduce.BatchScanConfig} objects to associate with the job
+ * an array of {@link BatchScanConfig} objects to associate with the job
* @since 1.6.0
*/
public static void setBatchScanConfigs(Class<?> implementingClass, Configuration conf, Map<String,BatchScanConfig> configs) {
@@ -505,7 +505,7 @@ public class InputConfigurator extends ConfiguratorBase {
}
/**
- * Returns all {@link org.apache.accumulo.core.client.mapreduce.BatchScanConfig} objects associated with this job.
+ * Returns all {@link BatchScanConfig} objects associated with this job.
*
* @param implementingClass
* the class whose name will be used as a prefix for the property configuration key
@@ -538,7 +538,7 @@ public class InputConfigurator extends ConfiguratorBase {
}
/**
- * Returns the {@link org.apache.accumulo.core.client.mapreduce.BatchScanConfig} for the given table
+ * Returns the {@link BatchScanConfig} for the given table
*
* @param implementingClass
* the class whose name will be used as a prefix for the property configuration key
@@ -549,7 +549,7 @@ public class InputConfigurator extends ConfiguratorBase {
* @return the table query config for the given table name (if it exists) and null if it does not
* @since 1.6.0
*/
- public static BatchScanConfig getTableQueryConfig(Class<?> implementingClass, Configuration conf, String tableName) {
+ public static BatchScanConfig getBatchScanConfig(Class<?> implementingClass, Configuration conf, String tableName) {
Map<String,BatchScanConfig> queryConfigs = getBatchScanConfigs(implementingClass, conf);
return queryConfigs.get(tableName);
}
[2/2] git commit: ACCUMULO-1746 Adding documentation for
MultiTableInputFormat
Posted by cj...@apache.org.
ACCUMULO-1746 Adding documentation for MultiTableInputFormat
Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/01e5296d
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/01e5296d
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/01e5296d
Branch: refs/heads/master
Commit: 01e5296d5b12d20ea98425e652a472ddbfa954c6
Parents: fe23924
Author: Corey J. Nolet <cj...@gmail.com>
Authored: Wed Oct 16 00:07:07 2013 -0400
Committer: Corey J. Nolet <cj...@gmail.com>
Committed: Wed Oct 16 00:07:07 2013 -0400
----------------------------------------------------------------------
.../core/client/mapreduce/BatchScanConfig.java | 7 +-
.../accumulo_user_manual/chapters/analytics.tex | 80 ++++++++++++++++++++
2 files changed, 84 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/accumulo/blob/01e5296d/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java
index e4b89ca..985df55 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java
@@ -20,6 +20,7 @@ import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@@ -37,7 +38,7 @@ public class BatchScanConfig implements Writable {
private List<IteratorSetting> iterators;
private List<Range> ranges;
- private Set<Pair<Text,Text>> columns;
+ private Collection<Pair<Text,Text>> columns;
private boolean autoAdjustRanges = true;
private boolean useLocalIterators = false;
@@ -83,7 +84,7 @@ public class BatchScanConfig implements Writable {
* selected. An empty set is the default and is equivalent to scanning the all columns.
* @since 1.6.0
*/
- public BatchScanConfig fetchColumns(Set<Pair<Text,Text>> columns) {
+ public BatchScanConfig fetchColumns(Collection<Pair<Text,Text>> columns) {
this.columns = columns;
return this;
}
@@ -91,7 +92,7 @@ public class BatchScanConfig implements Writable {
/**
* Returns the columns to be fetched for this configuration
*/
- public Set<Pair<Text,Text>> getFetchedColumns() {
+ public Collection<Pair<Text,Text>> getFetchedColumns() {
return columns != null ? columns : new HashSet<Pair<Text,Text>>();
}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/01e5296d/docs/src/main/latex/accumulo_user_manual/chapters/analytics.tex
----------------------------------------------------------------------
diff --git a/docs/src/main/latex/accumulo_user_manual/chapters/analytics.tex b/docs/src/main/latex/accumulo_user_manual/chapters/analytics.tex
index d10baa8..7bbd177 100644
--- a/docs/src/main/latex/accumulo_user_manual/chapters/analytics.tex
+++ b/docs/src/main/latex/accumulo_user_manual/chapters/analytics.tex
@@ -125,6 +125,86 @@ AccumuloInputFormat.addIterator(job, is);
\end{verbatim}
\normalsize
+\subsection{AccumuloMultiTableInputFormat options}
+
+The AccumuloMultiTableInputFormat allows the scanning over multiple tables
+in a single MapReduce job. Separate ranges, columns, and iterators can be
+used for each table.
+
+\small
+\begin{verbatim}
+BatchScanConfig tableOneConfig = new BatchScanConfig();
+BatchScanConfig tableTwoConfig = new BatchScanConfig();
+\end{verbatim}
+\normalsize
+
+To set the configuration objects on the job:
+
+\small
+\begin{verbatim}
+Map<String, BatchScanConfig> configs = new HashMap<String,BatchScanConfig>();
+configs.put("table1", tableOneConfig);
+configs.put("table2", tableTwoConfig);
+AccumuloMultiTableInputFormat.setBatchScanConfigs(job, configs);
+\end{verbatim}
+\normalsize
+
+\Large
+\textbf{Optional settings:}
+\normalsize
+
+To restrict to a set of ranges:
+
+\small
+\begin{verbatim}
+ArrayList<Range> tableOneRanges = new ArrayList<Range>();
+ArrayList<Range> tableTwoRanges = new ArrayList<Range>();
+// populate array lists of row ranges for tables...
+tableOneConfig.setRanges(tableOneRanges);
+tableTwoConfig.setRanges(tableTwoRanges);
+\end{verbatim}
+\normalsize
+
+To restrict Accumulo to a list of columns:
+
+\small
+\begin{verbatim}
+ArrayList<Pair<Text,Text>> tableOneColumns = new ArrayList<Pair<Text,Text>>();
+ArrayList<Pair<Text,Text>> tableTwoColumns = new ArrayList<Pair<Text,Text>>();
+// populate lists of columns for each of the tables ...
+tableOneConfig.fetchColumns(tableOneColumns);
+tableTwoConfig.fetchColumns(tableTwoColumns);
+\end{verbatim}
+\normalsize
+
+To set scan iterators:
+
+\small
+\begin{verbatim}
+List<IteratorSetting> tableOneIterators = new ArrayList<IteratorSetting>();
+List<IteratorSetting> tableTwoIterators = new ArrayList<IteratorSetting>();
+// populate the lists of iterator settings for each of the tables ...
+tableOneConfig.setIterators(tableOneIterators);
+tableTwoConfig.setIterators(tableTwoIterators);
+\end{verbatim}
+\normalsize
+
+
+The name of the table can be retrieved from the input split:
+
+\small
+\begin{verbatim}
+class MyMapper extends Mapper<Key,Value,WritableComparable,Writable> {
+ public void map(Key k, Value v, Context c) {
+ RangeInputSplit split = (RangeInputSplit)c.getInputSplit();
+ String tableName = split.getTableName();
+ // do something with table name
+ }
+}
+\end{verbatim}
+\normalsize
+
+
\subsection{AccumuloOutputFormat options}
\small