You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by md...@apache.org on 2014/04/21 23:20:01 UTC
[01/12] git commit: ACCUMULO-2702 Create separate MR Client Opts
Repository: accumulo
Updated Branches:
refs/heads/master 4e819419c -> 4dfcb9dec
ACCUMULO-2702 Create separate MR Client Opts
Separate the MR opts from the core client opts. There is plenty more
work to be done, but this is a reasonable first step in that direction.
We should probably be using a lot more composition than inheritance
overall, and this resulted in some slight code duplication, but the
change set would have been *much* more extensive otherwise.
Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/f74c5c6d
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/f74c5c6d
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/f74c5c6d
Branch: refs/heads/master
Commit: f74c5c6da2a7532114599ac217b33072c03f05ec
Parents: 4e81941
Author: Mike Drob <md...@cloudera.com>
Authored: Mon Apr 21 13:05:18 2014 -0400
Committer: Mike Drob <md...@cloudera.com>
Committed: Mon Apr 21 17:17:48 2014 -0400
----------------------------------------------------------------------
.../accumulo/core/cli/ClientOnDefaultTable.java | 30 +++--------
.../core/cli/ClientOnRequiredTable.java | 35 ++++---------
.../apache/accumulo/core/cli/ClientOpts.java | 8 ---
.../core/cli/MapReduceClientOnDefaultTable.java | 49 ++++++++++++++++++
.../cli/MapReduceClientOnRequiredTable.java | 53 ++++++++++++++++++++
.../accumulo/core/cli/MapReduceClientOpts.java | 32 ++++++++++++
.../org/apache/accumulo/core/util/Merge.java | 10 ++--
.../core/cli/ClientOnDefaultTableTest.java | 43 ++++++++++++++++
.../accumulo/examples/simple/client/Flush.java | 2 +-
.../simple/client/RandomBatchScanner.java | 7 +--
.../simple/client/RandomBatchWriter.java | 2 +-
.../simple/client/SequentialBatchWriter.java | 2 +-
.../examples/simple/dirlist/FileCount.java | 4 +-
.../examples/simple/dirlist/QueryUtil.java | 2 +-
.../simple/filedata/CharacterHistogram.java | 4 +-
.../simple/filedata/FileDataIngest.java | 8 +--
.../helloworld/InsertWithBatchWriter.java | 6 +--
.../examples/simple/helloworld/ReadData.java | 2 +-
.../simple/isolation/InterferenceTest.java | 10 ++--
.../examples/simple/mapreduce/NGramIngest.java | 12 ++---
.../examples/simple/mapreduce/RegexExample.java | 4 +-
.../examples/simple/mapreduce/RowHash.java | 4 +-
.../examples/simple/mapreduce/TableToFile.java | 4 +-
.../simple/mapreduce/TeraSortIngest.java | 6 +--
.../simple/mapreduce/UniqueColumns.java | 10 ++--
.../examples/simple/mapreduce/WordCount.java | 4 +-
.../mapreduce/bulk/BulkIngestExample.java | 8 +--
.../simple/mapreduce/bulk/SetupTable.java | 4 +-
.../simple/mapreduce/bulk/VerifyIngest.java | 2 +-
.../accumulo/examples/simple/shard/Index.java | 2 +-
.../accumulo/examples/simple/shard/Query.java | 2 +-
.../examples/simple/dirlist/CountTest.java | 2 +-
.../accumulo/test/BulkImportDirectory.java | 2 +-
.../apache/accumulo/test/TestBinaryRows.java | 12 ++---
.../test/continuous/ContinuousIngest.java | 4 +-
.../continuous/ContinuousStatsCollector.java | 2 +-
.../test/continuous/ContinuousVerify.java | 4 +-
.../performance/scan/CollectTabletStats.java | 12 ++---
.../accumulo/test/randomwalk/bulk/Verify.java | 2 +-
.../accumulo/test/functional/BinaryIT.java | 2 +-
.../apache/accumulo/test/functional/BulkIT.java | 4 +-
.../test/functional/FateStarvationIT.java | 2 +-
.../test/functional/MasterFailoverIT.java | 2 +-
.../accumulo/test/functional/RenameIT.java | 6 +--
.../accumulo/test/functional/TableIT.java | 4 +-
.../accumulo/test/functional/WriteLotsIT.java | 4 +-
46 files changed, 286 insertions(+), 149 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/core/src/main/java/org/apache/accumulo/core/cli/ClientOnDefaultTable.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/cli/ClientOnDefaultTable.java b/core/src/main/java/org/apache/accumulo/core/cli/ClientOnDefaultTable.java
index b86638a..d12f4a5 100644
--- a/core/src/main/java/org/apache/accumulo/core/cli/ClientOnDefaultTable.java
+++ b/core/src/main/java/org/apache/accumulo/core/cli/ClientOnDefaultTable.java
@@ -16,38 +16,22 @@
*/
package org.apache.accumulo.core.cli;
-import org.apache.accumulo.core.client.AccumuloSecurityException;
-import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
-import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
-import org.apache.hadoop.mapreduce.Job;
-
import com.beust.jcommander.Parameter;
public class ClientOnDefaultTable extends ClientOpts {
- private final String defaultTable;
-
+ @Parameter(names = "--table", description = "table to use")
+ private String tableName;
+
public ClientOnDefaultTable(String table) {
- this.defaultTable = table;
+ this.tableName = table;
}
- @Parameter(names = "--table", description = "table to use")
- public String tableName;
-
public String getTableName() {
- if (tableName == null)
- return defaultTable;
return tableName;
}
-
- @Override
- public void setAccumuloConfigs(Job job) throws AccumuloSecurityException {
- super.setAccumuloConfigs(job);
- AccumuloInputFormat.setConnectorInfo(job, principal, getToken());
- AccumuloInputFormat.setInputTableName(job, getTableName());
- AccumuloInputFormat.setScanAuthorizations(job, auths);
- AccumuloOutputFormat.setConnectorInfo(job, principal, getToken());
- AccumuloOutputFormat.setCreateTables(job, true);
- AccumuloOutputFormat.setDefaultTableName(job, getTableName());
+
+ public void setTableName(String tableName) {
+ this.tableName = tableName;
}
}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/core/src/main/java/org/apache/accumulo/core/cli/ClientOnRequiredTable.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/cli/ClientOnRequiredTable.java b/core/src/main/java/org/apache/accumulo/core/cli/ClientOnRequiredTable.java
index a451315..e6d331c 100644
--- a/core/src/main/java/org/apache/accumulo/core/cli/ClientOnRequiredTable.java
+++ b/core/src/main/java/org/apache/accumulo/core/cli/ClientOnRequiredTable.java
@@ -16,35 +16,18 @@
*/
package org.apache.accumulo.core.cli;
-import org.apache.accumulo.core.client.AccumuloSecurityException;
-import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
-import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
-import org.apache.hadoop.mapreduce.Job;
-
import com.beust.jcommander.Parameter;
+
public class ClientOnRequiredTable extends ClientOpts {
-
@Parameter(names = {"-t", "--table"}, required = true, description = "table to use")
- public String tableName = null;
-
- @Parameter(names = {"-tf", "--tokenFile"}, description = "File in hdfs containing the user's authentication token create with \"bin/accumulo create-token\"")
- public String tokenFile = "";
-
- @Override
- public void setAccumuloConfigs(Job job) throws AccumuloSecurityException {
- super.setAccumuloConfigs(job);
-
- if (tokenFile.isEmpty()) {
- AccumuloInputFormat.setConnectorInfo(job, principal, getToken());
- AccumuloOutputFormat.setConnectorInfo(job, principal, getToken());
- } else {
- AccumuloInputFormat.setConnectorInfo(job, principal, tokenFile);
- AccumuloOutputFormat.setConnectorInfo(job, principal, tokenFile);
- }
- AccumuloInputFormat.setInputTableName(job, tableName);
- AccumuloInputFormat.setScanAuthorizations(job, auths);
- AccumuloOutputFormat.setCreateTables(job, true);
- AccumuloOutputFormat.setDefaultTableName(job, tableName);
+ private String tableName;
+
+ public String getTableName() {
+ return tableName;
+ }
+
+ public void setTableName(String tableName) {
+ this.tableName = tableName;
}
}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/core/src/main/java/org/apache/accumulo/core/cli/ClientOpts.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/cli/ClientOpts.java b/core/src/main/java/org/apache/accumulo/core/cli/ClientOpts.java
index d209d3f..e582160 100644
--- a/core/src/main/java/org/apache/accumulo/core/cli/ClientOpts.java
+++ b/core/src/main/java/org/apache/accumulo/core/cli/ClientOpts.java
@@ -31,8 +31,6 @@ import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.Instance;
import org.apache.accumulo.core.client.ZooKeeperInstance;
import org.apache.accumulo.core.client.impl.thrift.SecurityErrorCode;
-import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
-import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
import org.apache.accumulo.core.client.mock.MockInstance;
import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
import org.apache.accumulo.core.client.security.tokens.AuthenticationToken.Properties;
@@ -48,7 +46,6 @@ import org.apache.accumulo.trace.instrument.Trace;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.Job;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
@@ -213,11 +210,6 @@ public class ClientOpts extends Help {
return getInstance().getConnector(principal, getToken());
}
- public void setAccumuloConfigs(Job job) throws AccumuloSecurityException {
- AccumuloInputFormat.setZooKeeperInstance(job, this.getClientConfiguration());
- AccumuloOutputFormat.setZooKeeperInstance(job, this.getClientConfiguration());
- }
-
protected ClientConfiguration getClientConfiguration() throws IllegalArgumentException {
if (cachedClientConfig != null)
return cachedClientConfig;
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/core/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOnDefaultTable.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOnDefaultTable.java b/core/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOnDefaultTable.java
new file mode 100644
index 0000000..e7a3dd4
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOnDefaultTable.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.cli;
+
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
+import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
+import org.apache.hadoop.mapreduce.Job;
+
+import com.beust.jcommander.Parameter;
+
+public class MapReduceClientOnDefaultTable extends MapReduceClientOpts {
+ @Parameter(names = "--table", description = "table to use")
+ public String tableName;
+
+ public MapReduceClientOnDefaultTable(String table) {
+ this.tableName = table;
+ }
+
+ public String getTableName() {
+ return tableName;
+ }
+
+ @Override
+ public void setAccumuloConfigs(Job job) throws AccumuloSecurityException {
+ super.setAccumuloConfigs(job);
+ AccumuloInputFormat.setConnectorInfo(job, principal, getToken());
+ AccumuloInputFormat.setInputTableName(job, getTableName());
+ AccumuloInputFormat.setScanAuthorizations(job, auths);
+ AccumuloOutputFormat.setConnectorInfo(job, principal, getToken());
+ AccumuloOutputFormat.setCreateTables(job, true);
+ AccumuloOutputFormat.setDefaultTableName(job, getTableName());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/core/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOnRequiredTable.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOnRequiredTable.java b/core/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOnRequiredTable.java
new file mode 100644
index 0000000..abfc17d
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOnRequiredTable.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.cli;
+
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
+import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
+import org.apache.hadoop.mapreduce.Job;
+
+import com.beust.jcommander.Parameter;
+
+public class MapReduceClientOnRequiredTable extends MapReduceClientOpts {
+ @Parameter(names = {"-t", "--table"}, required = true, description = "table to use")
+ private String tableName;
+
+ @Parameter(names = {"-tf", "--tokenFile"}, description = "File in hdfs containing the user's authentication token create with \"bin/accumulo create-token\"")
+ private String tokenFile = "";
+
+ @Override
+ public void setAccumuloConfigs(Job job) throws AccumuloSecurityException {
+ super.setAccumuloConfigs(job);
+
+ if (tokenFile.isEmpty()) {
+ AccumuloInputFormat.setConnectorInfo(job, principal, getToken());
+ AccumuloOutputFormat.setConnectorInfo(job, principal, getToken());
+ } else {
+ AccumuloInputFormat.setConnectorInfo(job, principal, tokenFile);
+ AccumuloOutputFormat.setConnectorInfo(job, principal, tokenFile);
+ }
+ AccumuloInputFormat.setInputTableName(job, getTableName());
+ AccumuloInputFormat.setScanAuthorizations(job, auths);
+ AccumuloOutputFormat.setCreateTables(job, true);
+ AccumuloOutputFormat.setDefaultTableName(job, getTableName());
+ }
+
+ public String getTableName() {
+ return tableName;
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/core/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOpts.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOpts.java b/core/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOpts.java
new file mode 100644
index 0000000..4b3b7ed
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOpts.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.cli;
+
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
+import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
+import org.apache.hadoop.mapreduce.Job;
+
+/**
+ * Adds some MR awareness to the ClientOpts
+ */
+public class MapReduceClientOpts extends ClientOpts {
+ public void setAccumuloConfigs(Job job) throws AccumuloSecurityException {
+ AccumuloInputFormat.setZooKeeperInstance(job, this.getClientConfiguration());
+ AccumuloOutputFormat.setZooKeeperInstance(job, this.getClientConfiguration());
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/core/src/main/java/org/apache/accumulo/core/util/Merge.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/util/Merge.java b/core/src/main/java/org/apache/accumulo/core/util/Merge.java
index 4c0a3ea..8049cd7 100644
--- a/core/src/main/java/org/apache/accumulo/core/util/Merge.java
+++ b/core/src/main/java/org/apache/accumulo/core/util/Merge.java
@@ -91,17 +91,17 @@ public class Merge {
try {
Connector conn = opts.getConnector();
- if (!conn.tableOperations().exists(opts.tableName)) {
- System.err.println("table " + opts.tableName + " does not exist");
+ if (!conn.tableOperations().exists(opts.getTableName())) {
+ System.err.println("table " + opts.getTableName() + " does not exist");
return;
}
if (opts.goalSize == null || opts.goalSize < 1) {
- AccumuloConfiguration tableConfig = new ConfigurationCopy(conn.tableOperations().getProperties(opts.tableName));
+ AccumuloConfiguration tableConfig = new ConfigurationCopy(conn.tableOperations().getProperties(opts.getTableName()));
opts.goalSize = tableConfig.getMemoryInBytes(Property.TABLE_SPLIT_THRESHOLD);
}
- message("Merging tablets in table %s to %d bytes", opts.tableName, opts.goalSize);
- mergomatic(conn, opts.tableName, opts.begin, opts.end, opts.goalSize, opts.force);
+ message("Merging tablets in table %s to %d bytes", opts.getTableName(), opts.goalSize);
+ mergomatic(conn, opts.getTableName(), opts.begin, opts.end, opts.goalSize, opts.force);
} catch (Exception ex) {
throw new MergeException(ex);
}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/core/src/test/java/org/apache/accumulo/core/cli/ClientOnDefaultTableTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/cli/ClientOnDefaultTableTest.java b/core/src/test/java/org/apache/accumulo/core/cli/ClientOnDefaultTableTest.java
new file mode 100644
index 0000000..78c0f1e
--- /dev/null
+++ b/core/src/test/java/org/apache/accumulo/core/cli/ClientOnDefaultTableTest.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.cli;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Before;
+import org.junit.Test;
+
+public class ClientOnDefaultTableTest {
+
+ ClientOnDefaultTable client;
+
+ @Before
+ public void setUp() {
+ client = new ClientOnDefaultTable("test");
+ }
+
+ @Test
+ public void testDefaultTable() {
+ assertEquals("test", client.getTableName());
+ }
+
+ @Test
+ public void testNonDefaultTable() {
+ client.parseArgs("program", new String[] {"--table", "other"});
+ assertEquals("other", client.getTableName());
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/examples/simple/src/main/java/org/apache/accumulo/examples/simple/client/Flush.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/client/Flush.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/client/Flush.java
index 225fac2..893ed3f 100644
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/client/Flush.java
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/client/Flush.java
@@ -29,7 +29,7 @@ public class Flush {
opts.parseArgs(Flush.class.getName(), args);
try {
Connector connector = opts.getConnector();
- connector.tableOperations().flush(opts.tableName, null, null, true);
+ connector.tableOperations().flush(opts.getTableName(), null, null, true);
} catch (Exception e) {
throw new RuntimeException(e);
}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/examples/simple/src/main/java/org/apache/accumulo/examples/simple/client/RandomBatchScanner.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/client/RandomBatchScanner.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/client/RandomBatchScanner.java
index 5fe23a5..4aa8269 100644
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/client/RandomBatchScanner.java
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/client/RandomBatchScanner.java
@@ -16,12 +16,14 @@
*/
package org.apache.accumulo.examples.simple.client;
+import static org.apache.accumulo.examples.simple.client.RandomBatchWriter.abs;
+
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map.Entry;
-import java.util.concurrent.TimeUnit;
import java.util.Random;
+import java.util.concurrent.TimeUnit;
import org.apache.accumulo.core.cli.BatchScannerOpts;
import org.apache.accumulo.core.cli.ClientOnRequiredTable;
@@ -35,7 +37,6 @@ import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.hadoop.io.Text;
import org.apache.log4j.Logger;
-import static org.apache.accumulo.examples.simple.client.RandomBatchWriter.abs;
import com.beust.jcommander.Parameter;
@@ -198,7 +199,7 @@ public class RandomBatchScanner {
opts.parseArgs(RandomBatchScanner.class.getName(), args, bsOpts);
Connector connector = opts.getConnector();
- BatchScanner batchReader = connector.createBatchScanner(opts.tableName, opts.auths, bsOpts.scanThreads);
+ BatchScanner batchReader = connector.createBatchScanner(opts.getTableName(), opts.auths, bsOpts.scanThreads);
batchReader.setTimeout(bsOpts.scanTimeout, TimeUnit.MILLISECONDS);
Random r;
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/examples/simple/src/main/java/org/apache/accumulo/examples/simple/client/RandomBatchWriter.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/client/RandomBatchWriter.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/client/RandomBatchWriter.java
index 44947d1..50c5d06 100644
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/client/RandomBatchWriter.java
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/client/RandomBatchWriter.java
@@ -139,7 +139,7 @@ public class RandomBatchWriter {
r = new Random(opts.seed);
}
Connector connector = opts.getConnector();
- BatchWriter bw = connector.createBatchWriter(opts.tableName, bwOpts.getBatchWriterConfig());
+ BatchWriter bw = connector.createBatchWriter(opts.getTableName(), bwOpts.getBatchWriterConfig());
// reuse the ColumnVisibility object to improve performance
ColumnVisibility cv = opts.visiblity;
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/examples/simple/src/main/java/org/apache/accumulo/examples/simple/client/SequentialBatchWriter.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/client/SequentialBatchWriter.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/client/SequentialBatchWriter.java
index c37c1c3..3ae21e9 100644
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/client/SequentialBatchWriter.java
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/client/SequentialBatchWriter.java
@@ -54,7 +54,7 @@ public class SequentialBatchWriter {
BatchWriterOpts bwOpts = new BatchWriterOpts();
opts.parseArgs(SequentialBatchWriter.class.getName(), args, bwOpts);
Connector connector = opts.getConnector();
- BatchWriter bw = connector.createBatchWriter(opts.tableName, bwOpts.getBatchWriterConfig());
+ BatchWriter bw = connector.createBatchWriter(opts.getTableName(), bwOpts.getBatchWriterConfig());
long end = opts.start + opts.num;
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/examples/simple/src/main/java/org/apache/accumulo/examples/simple/dirlist/FileCount.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/dirlist/FileCount.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/dirlist/FileCount.java
index a810b38..cb6d350 100644
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/dirlist/FileCount.java
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/dirlist/FileCount.java
@@ -246,9 +246,9 @@ public class FileCount {
inserts = 0;
Connector conn = opts.getConnector();
- Scanner scanner = conn.createScanner(opts.tableName, opts.auths);
+ Scanner scanner = conn.createScanner(opts.getTableName(), opts.auths);
scanner.setBatchSize(scanOpts.scanBatchSize);
- BatchWriter bw = conn.createBatchWriter(opts.tableName, bwOpts.getBatchWriterConfig());
+ BatchWriter bw = conn.createBatchWriter(opts.getTableName(), bwOpts.getBatchWriterConfig());
long t1 = System.currentTimeMillis();
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/examples/simple/src/main/java/org/apache/accumulo/examples/simple/dirlist/QueryUtil.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/dirlist/QueryUtil.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/dirlist/QueryUtil.java
index 7795ec3..09fb40c 100644
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/dirlist/QueryUtil.java
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/dirlist/QueryUtil.java
@@ -53,7 +53,7 @@ public class QueryUtil {
public QueryUtil(Opts opts) throws AccumuloException,
AccumuloSecurityException {
conn = opts.getConnector();
- this.tableName = opts.tableName;
+ this.tableName = opts.getTableName();
this.auths = opts.auths;
}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/examples/simple/src/main/java/org/apache/accumulo/examples/simple/filedata/CharacterHistogram.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/filedata/CharacterHistogram.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/filedata/CharacterHistogram.java
index d0662b6..c760ee3 100644
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/filedata/CharacterHistogram.java
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/filedata/CharacterHistogram.java
@@ -22,7 +22,7 @@ import java.util.Arrays;
import java.util.List;
import java.util.Map.Entry;
-import org.apache.accumulo.core.cli.ClientOnRequiredTable;
+import org.apache.accumulo.core.cli.MapReduceClientOnRequiredTable;
import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Mutation;
@@ -76,7 +76,7 @@ public class CharacterHistogram extends Configured implements Tool {
}
}
- static class Opts extends ClientOnRequiredTable {
+ static class Opts extends MapReduceClientOnRequiredTable {
@Parameter(names = "--vis")
String visibilities = "";
}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/examples/simple/src/main/java/org/apache/accumulo/examples/simple/filedata/FileDataIngest.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/filedata/FileDataIngest.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/filedata/FileDataIngest.java
index 78fef0d..52ea0bd 100644
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/filedata/FileDataIngest.java
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/filedata/FileDataIngest.java
@@ -188,11 +188,11 @@ public class FileDataIngest {
opts.parseArgs(FileDataIngest.class.getName(), args, bwOpts);
Connector conn = opts.getConnector();
- if (!conn.tableOperations().exists(opts.tableName)) {
- conn.tableOperations().create(opts.tableName);
- conn.tableOperations().attachIterator(opts.tableName, new IteratorSetting(1, ChunkCombiner.class));
+ if (!conn.tableOperations().exists(opts.getTableName())) {
+ conn.tableOperations().create(opts.getTableName());
+ conn.tableOperations().attachIterator(opts.getTableName(), new IteratorSetting(1, ChunkCombiner.class));
}
- BatchWriter bw = conn.createBatchWriter(opts.tableName, bwOpts.getBatchWriterConfig());
+ BatchWriter bw = conn.createBatchWriter(opts.getTableName(), bwOpts.getBatchWriterConfig());
FileDataIngest fdi = new FileDataIngest(opts.chunkSize, opts.visibility);
for (String filename : opts.files) {
fdi.insertFileData(filename, bw);
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithBatchWriter.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithBatchWriter.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithBatchWriter.java
index b980133..74d8548 100644
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithBatchWriter.java
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithBatchWriter.java
@@ -44,9 +44,9 @@ public class InsertWithBatchWriter {
Connector connector = opts.getConnector();
MultiTableBatchWriter mtbw = connector.createMultiTableBatchWriter(bwOpts.getBatchWriterConfig());
- if (!connector.tableOperations().exists(opts.tableName))
- connector.tableOperations().create(opts.tableName);
- BatchWriter bw = mtbw.getBatchWriter(opts.tableName);
+ if (!connector.tableOperations().exists(opts.getTableName()))
+ connector.tableOperations().create(opts.getTableName());
+ BatchWriter bw = mtbw.getBatchWriter(opts.getTableName());
Text colf = new Text("colfam");
System.out.println("writing ...");
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/ReadData.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/ReadData.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/ReadData.java
index 97bb633..4eaa31f 100644
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/ReadData.java
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/ReadData.java
@@ -55,7 +55,7 @@ public class ReadData {
Connector connector = opts.getConnector();
- Scanner scan = connector.createScanner(opts.tableName, opts.auths);
+ Scanner scan = connector.createScanner(opts.getTableName(), opts.auths);
scan.setBatchSize(scanOpts.scanBatchSize);
Key start = null;
if (opts.startKey != null)
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/examples/simple/src/main/java/org/apache/accumulo/examples/simple/isolation/InterferenceTest.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/isolation/InterferenceTest.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/isolation/InterferenceTest.java
index a90848e..db8c139 100644
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/isolation/InterferenceTest.java
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/isolation/InterferenceTest.java
@@ -160,16 +160,16 @@ public class InterferenceTest {
opts.iterations = Long.MAX_VALUE;
Connector conn = opts.getConnector();
- if (!conn.tableOperations().exists(opts.tableName))
- conn.tableOperations().create(opts.tableName);
+ if (!conn.tableOperations().exists(opts.getTableName()))
+ conn.tableOperations().create(opts.getTableName());
- Thread writer = new Thread(new Writer(conn.createBatchWriter(opts.tableName, bwOpts.getBatchWriterConfig()), opts.iterations));
+ Thread writer = new Thread(new Writer(conn.createBatchWriter(opts.getTableName(), bwOpts.getBatchWriterConfig()), opts.iterations));
writer.start();
Reader r;
if (opts.isolated)
- r = new Reader(new IsolatedScanner(conn.createScanner(opts.tableName, opts.auths)));
+ r = new Reader(new IsolatedScanner(conn.createScanner(opts.getTableName(), opts.auths)));
else
- r = new Reader(conn.createScanner(opts.tableName, opts.auths));
+ r = new Reader(conn.createScanner(opts.getTableName(), opts.auths));
Thread reader;
reader = new Thread(r);
reader.start();
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/NGramIngest.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/NGramIngest.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/NGramIngest.java
index f8d0b0b..6ae74ce 100644
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/NGramIngest.java
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/NGramIngest.java
@@ -20,7 +20,7 @@ import java.io.IOException;
import java.util.SortedSet;
import java.util.TreeSet;
-import org.apache.accumulo.core.cli.ClientOnRequiredTable;
+import org.apache.accumulo.core.cli.MapReduceClientOnRequiredTable;
import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Value;
@@ -45,7 +45,7 @@ public class NGramIngest extends Configured implements Tool {
private static final Logger log = Logger.getLogger(NGramIngest.class);
- static class Opts extends ClientOnRequiredTable {
+ static class Opts extends MapReduceClientOnRequiredTable {
@Parameter(names = "--input", required = true)
String inputDirectory;
}
@@ -83,9 +83,9 @@ public class NGramIngest extends Configured implements Tool {
job.setNumReduceTasks(0);
job.setSpeculativeExecution(false);
- if (!opts.getConnector().tableOperations().exists(opts.tableName)) {
- log.info("Creating table " + opts.tableName);
- opts.getConnector().tableOperations().create(opts.tableName);
+ if (!opts.getConnector().tableOperations().exists(opts.getTableName())) {
+ log.info("Creating table " + opts.getTableName());
+ opts.getConnector().tableOperations().create(opts.getTableName());
SortedSet<Text> splits = new TreeSet<Text>();
String numbers[] = "1 2 3 4 5 6 7 8 9".split("\\s");
String lower[] = "a b c d e f g h i j k l m n o p q r s t u v w x y z".split("\\s");
@@ -95,7 +95,7 @@ public class NGramIngest extends Configured implements Tool {
splits.add(new Text(s));
}
}
- opts.getConnector().tableOperations().addSplits(opts.tableName, splits);
+ opts.getConnector().tableOperations().addSplits(opts.getTableName(), splits);
}
TextInputFormat.addInputPath(job, new Path(opts.inputDirectory));
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RegexExample.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RegexExample.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RegexExample.java
index 47e5879..d6695e6 100644
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RegexExample.java
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RegexExample.java
@@ -18,7 +18,7 @@ package org.apache.accumulo.examples.simple.mapreduce;
import java.io.IOException;
-import org.apache.accumulo.core.cli.ClientOnRequiredTable;
+import org.apache.accumulo.core.cli.MapReduceClientOnRequiredTable;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
import org.apache.accumulo.core.data.Key;
@@ -43,7 +43,7 @@ public class RegexExample extends Configured implements Tool {
}
}
- static class Opts extends ClientOnRequiredTable {
+ static class Opts extends MapReduceClientOnRequiredTable {
@Parameter(names = "--rowRegex")
String rowRegex;
@Parameter(names = "--columnFamilyRegex")
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RowHash.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RowHash.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RowHash.java
index 1fa9b8f..904713d 100644
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RowHash.java
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RowHash.java
@@ -19,7 +19,7 @@ package org.apache.accumulo.examples.simple.mapreduce;
import java.io.IOException;
import java.util.Collections;
-import org.apache.accumulo.core.cli.ClientOnRequiredTable;
+import org.apache.accumulo.core.cli.MapReduceClientOnRequiredTable;
import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
import org.apache.accumulo.core.data.Key;
@@ -55,7 +55,7 @@ public class RowHash extends Configured implements Tool {
public void setup(Context job) {}
}
- private static class Opts extends ClientOnRequiredTable {
+ private static class Opts extends MapReduceClientOnRequiredTable {
@Parameter(names = "--column", required = true)
String column = null;
}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TableToFile.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TableToFile.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TableToFile.java
index 094e6c1..351a51c 100644
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TableToFile.java
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TableToFile.java
@@ -21,7 +21,7 @@ import java.util.AbstractMap.SimpleImmutableEntry;
import java.util.HashSet;
import java.util.Map;
-import org.apache.accumulo.core.cli.ClientOnRequiredTable;
+import org.apache.accumulo.core.cli.MapReduceClientOnRequiredTable;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
import org.apache.accumulo.core.data.Key;
@@ -47,7 +47,7 @@ import com.beust.jcommander.Parameter;
*/
public class TableToFile extends Configured implements Tool {
- static class Opts extends ClientOnRequiredTable {
+ static class Opts extends MapReduceClientOnRequiredTable {
@Parameter(names = "--output", description = "output directory", required = true)
String output;
@Parameter(names = "--columns", description = "columns to extract, in cf:cq{,cf:cq,...} form")
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TeraSortIngest.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TeraSortIngest.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TeraSortIngest.java
index f9f2d39..33a9fa1 100644
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TeraSortIngest.java
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TeraSortIngest.java
@@ -25,7 +25,7 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Random;
-import org.apache.accumulo.core.cli.ClientOnRequiredTable;
+import org.apache.accumulo.core.cli.MapReduceClientOnRequiredTable;
import org.apache.accumulo.core.client.BatchWriterConfig;
import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
import org.apache.accumulo.core.data.Mutation;
@@ -351,7 +351,7 @@ public class TeraSortIngest extends Configured implements Tool {
ToolRunner.run(new Configuration(), new TeraSortIngest(), args);
}
- static class Opts extends ClientOnRequiredTable {
+ static class Opts extends MapReduceClientOnRequiredTable {
@Parameter(names = "--count", description = "number of rows to ingest", required = true)
long numRows;
@Parameter(names = {"-nk", "--minKeySize"}, description = "miniumum key size", required = true)
@@ -392,7 +392,7 @@ public class TeraSortIngest extends Configured implements Tool {
conf.setInt("cloudgen.maxkeylength", opts.maxKeyLength);
conf.setInt("cloudgen.minvaluelength", opts.minValueLength);
conf.setInt("cloudgen.maxvaluelength", opts.maxValueLength);
- conf.set("cloudgen.tablename", opts.tableName);
+ conf.set("cloudgen.tablename", opts.getTableName());
if (args.length > 10)
conf.setInt(NUMSPLITS, opts.splits);
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/UniqueColumns.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/UniqueColumns.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/UniqueColumns.java
index e0e29ce..7c0b91a 100644
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/UniqueColumns.java
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/UniqueColumns.java
@@ -20,7 +20,7 @@ import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
-import org.apache.accumulo.core.cli.ClientOnRequiredTable;
+import org.apache.accumulo.core.cli.MapReduceClientOnRequiredTable;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
import org.apache.accumulo.core.data.ByteSequence;
@@ -73,7 +73,7 @@ public class UniqueColumns extends Configured implements Tool {
}
}
- static class Opts extends ClientOnRequiredTable {
+ static class Opts extends MapReduceClientOnRequiredTable {
@Parameter(names = "--output", description = "output directory")
String output;
@Parameter(names = "--reducers", description = "number of reducers to use", required = true)
@@ -93,7 +93,7 @@ public class UniqueColumns extends Configured implements Tool {
job.setJobName(jobName);
job.setJarByClass(this.getClass());
- String clone = opts.tableName;
+ String clone = opts.getTableName();
Connector conn = null;
opts.setAccumuloConfigs(job);
@@ -105,8 +105,8 @@ public class UniqueColumns extends Configured implements Tool {
*/
conn = opts.getConnector();
- clone = opts.tableName + "_" + jobName;
- conn.tableOperations().clone(opts.tableName, clone, true, new HashMap<String,String>(), new HashSet<String>());
+ clone = opts.getTableName() + "_" + jobName;
+ conn.tableOperations().clone(opts.getTableName(), clone, true, new HashMap<String,String>(), new HashSet<String>());
conn.tableOperations().offline(clone);
AccumuloInputFormat.setOfflineTableScan(job, true);
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/WordCount.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/WordCount.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/WordCount.java
index 220b85c..f114a24 100644
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/WordCount.java
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/WordCount.java
@@ -18,7 +18,7 @@ package org.apache.accumulo.examples.simple.mapreduce;
import java.io.IOException;
-import org.apache.accumulo.core.cli.ClientOnRequiredTable;
+import org.apache.accumulo.core.cli.MapReduceClientOnRequiredTable;
import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Value;
@@ -41,7 +41,7 @@ import com.beust.jcommander.Parameter;
*/
public class WordCount extends Configured implements Tool {
- static class Opts extends ClientOnRequiredTable {
+ static class Opts extends MapReduceClientOnRequiredTable {
@Parameter(names = "--input", description = "input directory")
String inputDirectory;
}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/BulkIngestExample.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/BulkIngestExample.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/BulkIngestExample.java
index 72bd7eb..1a43ec2 100644
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/BulkIngestExample.java
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/BulkIngestExample.java
@@ -21,7 +21,7 @@ import java.io.IOException;
import java.io.PrintStream;
import java.util.Collection;
-import org.apache.accumulo.core.cli.ClientOnRequiredTable;
+import org.apache.accumulo.core.cli.MapReduceClientOnRequiredTable;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat;
import org.apache.accumulo.core.client.mapreduce.lib.partition.RangePartitioner;
@@ -94,7 +94,7 @@ public class BulkIngestExample extends Configured implements Tool {
}
}
- static class Opts extends ClientOnRequiredTable {
+ static class Opts extends MapReduceClientOnRequiredTable {
@Parameter(names = "--inputDir", required = true)
String inputDir;
@Parameter(names = "--workDir", required = true)
@@ -131,7 +131,7 @@ public class BulkIngestExample extends Configured implements Tool {
FileSystem fs = FileSystem.get(conf);
out = new PrintStream(new BufferedOutputStream(fs.create(new Path(opts.workDir + "/splits.txt"))));
- Collection<Text> splits = connector.tableOperations().listSplits(opts.tableName, 100);
+ Collection<Text> splits = connector.tableOperations().listSplits(opts.getTableName(), 100);
for (Text split : splits)
out.println(new String(Base64.encodeBase64(TextUtil.getBytes(split))));
@@ -145,7 +145,7 @@ public class BulkIngestExample extends Configured implements Tool {
Path failures = new Path(opts.workDir, "failures");
fs.delete(failures, true);
fs.mkdirs(new Path(opts.workDir, "failures"));
- connector.tableOperations().importDirectory(opts.tableName, opts.workDir + "/files", opts.workDir + "/failures", false);
+ connector.tableOperations().importDirectory(opts.getTableName(), opts.workDir + "/files", opts.workDir + "/failures", false);
} catch (Exception e) {
throw new RuntimeException(e);
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/SetupTable.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/SetupTable.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/SetupTable.java
index b6d3d13..ac96e9d 100644
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/SetupTable.java
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/SetupTable.java
@@ -37,14 +37,14 @@ public class SetupTable {
Opts opts = new Opts();
opts.parseArgs(SetupTable.class.getName(), args);
Connector conn = opts.getConnector();
- conn.tableOperations().create(opts.tableName);
+ conn.tableOperations().create(opts.getTableName());
if (!opts.splits.isEmpty()) {
// create a table with initial partitions
TreeSet<Text> intialPartitions = new TreeSet<Text>();
for (String split : opts.splits) {
intialPartitions.add(new Text(split));
}
- conn.tableOperations().addSplits(opts.tableName, intialPartitions);
+ conn.tableOperations().addSplits(opts.getTableName(), intialPartitions);
}
}
}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/VerifyIngest.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/VerifyIngest.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/VerifyIngest.java
index 4366416..61d3f7e 100644
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/VerifyIngest.java
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/VerifyIngest.java
@@ -48,7 +48,7 @@ public class VerifyIngest {
opts.parseArgs(VerifyIngest.class.getName(), args);
Connector connector = opts.getConnector();
- Scanner scanner = connector.createScanner(opts.tableName, opts.auths);
+ Scanner scanner = connector.createScanner(opts.getTableName(), opts.auths);
scanner.setRange(new Range(new Text(String.format("row_%08d", opts.startRow)), null));
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Index.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Index.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Index.java
index 4731061..accb3a0 100644
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Index.java
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Index.java
@@ -105,7 +105,7 @@ public class Index {
String splitRegex = "\\W+";
- BatchWriter bw = opts.getConnector().createBatchWriter(opts.tableName, bwOpts.getBatchWriterConfig());
+ BatchWriter bw = opts.getConnector().createBatchWriter(opts.getTableName(), bwOpts.getBatchWriterConfig());
for (String filename : opts.files) {
index(opts.partitions, new File(filename), splitRegex, bw);
}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Query.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Query.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Query.java
index aa12c71..b0502a7 100644
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Query.java
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Query.java
@@ -71,7 +71,7 @@ public class Query {
BatchScannerOpts bsOpts = new BatchScannerOpts();
opts.parseArgs(Query.class.getName(), args, bsOpts);
Connector conn = opts.getConnector();
- BatchScanner bs = conn.createBatchScanner(opts.tableName, opts.auths, bsOpts.scanThreads);
+ BatchScanner bs = conn.createBatchScanner(opts.getTableName(), opts.auths, bsOpts.scanThreads);
bs.setTimeout(bsOpts.scanTimeout, TimeUnit.MILLISECONDS);
for (String entry : query(bs, opts.terms))
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/examples/simple/src/test/java/org/apache/accumulo/examples/simple/dirlist/CountTest.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/test/java/org/apache/accumulo/examples/simple/dirlist/CountTest.java b/examples/simple/src/test/java/org/apache/accumulo/examples/simple/dirlist/CountTest.java
index b32918e..f5f5d02 100644
--- a/examples/simple/src/test/java/org/apache/accumulo/examples/simple/dirlist/CountTest.java
+++ b/examples/simple/src/test/java/org/apache/accumulo/examples/simple/dirlist/CountTest.java
@@ -70,7 +70,7 @@ public class CountTest extends TestCase {
ScannerOpts scanOpts = new ScannerOpts();
BatchWriterOpts bwOpts = new BatchWriterOpts();
opts.instance = "counttest";
- opts.tableName = "dirlisttable";
+ opts.setTableName("dirlisttable");
opts.password = new Password("secret");
opts.mock = true;
opts.password = new Opts.Password("");
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/test/src/main/java/org/apache/accumulo/test/BulkImportDirectory.java
----------------------------------------------------------------------
diff --git a/test/src/main/java/org/apache/accumulo/test/BulkImportDirectory.java b/test/src/main/java/org/apache/accumulo/test/BulkImportDirectory.java
index 42a8041..93c2e98 100644
--- a/test/src/main/java/org/apache/accumulo/test/BulkImportDirectory.java
+++ b/test/src/main/java/org/apache/accumulo/test/BulkImportDirectory.java
@@ -62,7 +62,7 @@ public class BulkImportDirectory {
opts.parseArgs(BulkImportDirectory.class.getName(), args);
fs.delete(new Path(opts.failures), true);
fs.mkdirs(new Path(opts.failures));
- opts.getConnector().tableOperations().importDirectory(opts.tableName, opts.source, opts.failures, false);
+ opts.getConnector().tableOperations().importDirectory(opts.getTableName(), opts.source, opts.failures, false);
}
}
}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/test/src/main/java/org/apache/accumulo/test/TestBinaryRows.java
----------------------------------------------------------------------
diff --git a/test/src/main/java/org/apache/accumulo/test/TestBinaryRows.java b/test/src/main/java/org/apache/accumulo/test/TestBinaryRows.java
index c2af60c..a5ac0e8 100644
--- a/test/src/main/java/org/apache/accumulo/test/TestBinaryRows.java
+++ b/test/src/main/java/org/apache/accumulo/test/TestBinaryRows.java
@@ -85,7 +85,7 @@ public class TestBinaryRows {
final Text CF = new Text("cf"), CQ = new Text("cq");
final byte[] CF_BYTES = "cf".getBytes(StandardCharsets.UTF_8), CQ_BYTES = "cq".getBytes(StandardCharsets.UTF_8);
if (opts.mode.equals("ingest") || opts.mode.equals("delete")) {
- BatchWriter bw = connector.createBatchWriter(opts.tableName, bwOpts.getBatchWriterConfig());
+ BatchWriter bw = connector.createBatchWriter(opts.getTableName(), bwOpts.getBatchWriterConfig());
boolean delete = opts.mode.equals("delete");
for (long i = 0; i < opts.num; i++) {
@@ -103,7 +103,7 @@ public class TestBinaryRows {
bw.close();
} else if (opts.mode.equals("verifyDeleted")) {
- Scanner s = connector.createScanner(opts.tableName, opts.auths);
+ Scanner s = connector.createScanner(opts.getTableName(), opts.auths);
s.setBatchSize(scanOpts.scanBatchSize);
Key startKey = new Key(encodeLong(opts.start), CF_BYTES, CQ_BYTES, new byte[0], Long.MAX_VALUE);
Key stopKey = new Key(encodeLong(opts.start + opts.num - 1), CF_BYTES, CQ_BYTES, new byte[0], 0);
@@ -117,7 +117,7 @@ public class TestBinaryRows {
} else if (opts.mode.equals("verify")) {
long t1 = System.currentTimeMillis();
- Scanner s = connector.createScanner(opts.tableName, opts.auths);
+ Scanner s = connector.createScanner(opts.getTableName(), opts.auths);
Key startKey = new Key(encodeLong(opts.start), CF_BYTES, CQ_BYTES, new byte[0], Long.MAX_VALUE);
Key stopKey = new Key(encodeLong(opts.start + opts.num - 1), CF_BYTES, CQ_BYTES, new byte[0], 0);
s.setBatchSize(scanOpts.scanBatchSize);
@@ -153,7 +153,7 @@ public class TestBinaryRows {
for (int i = 0; i < numLookups; i++) {
long row = ((r.nextLong() & 0x7fffffffffffffffl) % opts.num) + opts.start;
- Scanner s = connector.createScanner(opts.tableName, opts.auths);
+ Scanner s = connector.createScanner(opts.getTableName(), opts.auths);
s.setBatchSize(scanOpts.scanBatchSize);
Key startKey = new Key(encodeLong(row), CF_BYTES, CQ_BYTES, new byte[0], Long.MAX_VALUE);
Key stopKey = new Key(encodeLong(row), CF_BYTES, CQ_BYTES, new byte[0], 0);
@@ -195,8 +195,8 @@ public class TestBinaryRows {
System.out.printf("added split point 0x%016x %,12d%n", splitPoint, splitPoint);
}
- connector.tableOperations().create(opts.tableName);
- connector.tableOperations().addSplits(opts.tableName, splits);
+ connector.tableOperations().create(opts.getTableName());
+ connector.tableOperations().addSplits(opts.getTableName(), splits);
} else {
throw new Exception("ERROR : " + opts.mode + " is not a valid operation.");
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/test/src/main/java/org/apache/accumulo/test/continuous/ContinuousIngest.java
----------------------------------------------------------------------
diff --git a/test/src/main/java/org/apache/accumulo/test/continuous/ContinuousIngest.java b/test/src/main/java/org/apache/accumulo/test/continuous/ContinuousIngest.java
index 2aca579..d6a16df 100644
--- a/test/src/main/java/org/apache/accumulo/test/continuous/ContinuousIngest.java
+++ b/test/src/main/java/org/apache/accumulo/test/continuous/ContinuousIngest.java
@@ -30,7 +30,7 @@ import java.util.zip.Checksum;
import org.apache.accumulo.core.Constants;
import org.apache.accumulo.core.cli.BatchWriterOpts;
-import org.apache.accumulo.core.cli.ClientOnDefaultTable;
+import org.apache.accumulo.core.cli.MapReduceClientOnDefaultTable;
import org.apache.accumulo.core.client.BatchWriter;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.MutationsRejectedException;
@@ -56,7 +56,7 @@ import com.beust.jcommander.Parameter;
public class ContinuousIngest {
- static public class BaseOpts extends ClientOnDefaultTable {
+ static public class BaseOpts extends MapReduceClientOnDefaultTable {
public class DebugConverter implements IStringConverter<String> {
@Override
public String convert(String debugLog) {
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/test/src/main/java/org/apache/accumulo/test/continuous/ContinuousStatsCollector.java
----------------------------------------------------------------------
diff --git a/test/src/main/java/org/apache/accumulo/test/continuous/ContinuousStatsCollector.java b/test/src/main/java/org/apache/accumulo/test/continuous/ContinuousStatsCollector.java
index f8cb0a8..458afe0 100644
--- a/test/src/main/java/org/apache/accumulo/test/continuous/ContinuousStatsCollector.java
+++ b/test/src/main/java/org/apache/accumulo/test/continuous/ContinuousStatsCollector.java
@@ -65,7 +65,7 @@ public class ContinuousStatsCollector {
public StatsCollectionTask(Opts opts, int scanBatchSize) {
this.opts = opts;
this.scanBatchSize = scanBatchSize;
- this.tableId = Tables.getNameToIdMap(opts.getInstance()).get(opts.tableName);
+ this.tableId = Tables.getNameToIdMap(opts.getInstance()).get(opts.getTableName());
System.out
.println("TIME TABLET_SERVERS TOTAL_ENTRIES TOTAL_INGEST TOTAL_QUERY TABLE_RECS TABLE_RECS_IN_MEM TABLE_INGEST TABLE_QUERY TABLE_TABLETS TABLE_TABLETS_ONLINE"
+ " ACCUMULO_DU ACCUMULO_DIRS ACCUMULO_FILES TABLE_DU TABLE_DIRS TABLE_FILES"
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/test/src/main/java/org/apache/accumulo/test/continuous/ContinuousVerify.java
----------------------------------------------------------------------
diff --git a/test/src/main/java/org/apache/accumulo/test/continuous/ContinuousVerify.java b/test/src/main/java/org/apache/accumulo/test/continuous/ContinuousVerify.java
index d56ccde..cd8e021 100644
--- a/test/src/main/java/org/apache/accumulo/test/continuous/ContinuousVerify.java
+++ b/test/src/main/java/org/apache/accumulo/test/continuous/ContinuousVerify.java
@@ -25,7 +25,7 @@ import java.util.HashSet;
import java.util.Random;
import java.util.Set;
-import org.apache.accumulo.core.cli.ClientOnDefaultTable;
+import org.apache.accumulo.core.cli.MapReduceClientOnDefaultTable;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
import org.apache.accumulo.core.data.Key;
@@ -158,7 +158,7 @@ public class ContinuousVerify extends Configured implements Tool {
}
}
- static class Opts extends ClientOnDefaultTable {
+ static class Opts extends MapReduceClientOnDefaultTable {
@Parameter(names = "--output", description = "location in HDFS to store the results; must not exist", required = true)
String outputDir = "/tmp/continuousVerify";
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/test/src/main/java/org/apache/accumulo/test/performance/scan/CollectTabletStats.java
----------------------------------------------------------------------
diff --git a/test/src/main/java/org/apache/accumulo/test/performance/scan/CollectTabletStats.java b/test/src/main/java/org/apache/accumulo/test/performance/scan/CollectTabletStats.java
index d2c8d0f..2195483 100644
--- a/test/src/main/java/org/apache/accumulo/test/performance/scan/CollectTabletStats.java
+++ b/test/src/main/java/org/apache/accumulo/test/performance/scan/CollectTabletStats.java
@@ -109,14 +109,14 @@ public class CollectTabletStats {
Instance instance = opts.getInstance();
final ServerConfiguration sconf = new ServerConfiguration(instance);
- String tableId = Tables.getNameToIdMap(instance).get(opts.tableName);
+ String tableId = Tables.getNameToIdMap(instance).get(opts.getTableName());
if (tableId == null) {
- log.error("Unable to find table named " + opts.tableName);
+ log.error("Unable to find table named " + opts.getTableName());
System.exit(-1);
}
TreeMap<KeyExtent,String> tabletLocations = new TreeMap<KeyExtent,String>();
- List<KeyExtent> candidates = findTablets(!opts.selectFarTablets, new Credentials(opts.principal, opts.getToken()), opts.tableName, instance,
+ List<KeyExtent> candidates = findTablets(!opts.selectFarTablets, new Credentials(opts.principal, opts.getToken()), opts.getTableName(), instance,
tabletLocations);
if (candidates.size() < opts.numThreads) {
@@ -136,7 +136,7 @@ public class CollectTabletStats {
System.out.println();
System.out.println("run location : " + InetAddress.getLocalHost().getHostName() + "/" + InetAddress.getLocalHost().getHostAddress());
System.out.println("num threads : " + opts.numThreads);
- System.out.println("table : " + opts.tableName);
+ System.out.println("table : " + opts.getTableName());
System.out.println("table id : " + tableId);
for (KeyExtent ke : tabletsToTest) {
@@ -217,7 +217,7 @@ public class CollectTabletStats {
Test test = new Test(ke) {
@Override
public int runTest() throws Exception {
- return scanTablet(conn, opts.tableName, opts.auths, scanOpts.scanBatchSize, ke.getPrevEndRow(), ke.getEndRow(), columns);
+ return scanTablet(conn, opts.getTableName(), opts.auths, scanOpts.scanBatchSize, ke.getPrevEndRow(), ke.getEndRow(), columns);
}
};
@@ -234,7 +234,7 @@ public class CollectTabletStats {
@Override
public void run() {
try {
- calcTabletStats(conn, opts.tableName, opts.auths, scanOpts.scanBatchSize, ke, columns);
+ calcTabletStats(conn, opts.getTableName(), opts.auths, scanOpts.scanBatchSize, ke, columns);
} catch (Exception e) {
e.printStackTrace();
}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/test/src/main/java/org/apache/accumulo/test/randomwalk/bulk/Verify.java
----------------------------------------------------------------------
diff --git a/test/src/main/java/org/apache/accumulo/test/randomwalk/bulk/Verify.java b/test/src/main/java/org/apache/accumulo/test/randomwalk/bulk/Verify.java
index c36e4a0..852fe37 100644
--- a/test/src/main/java/org/apache/accumulo/test/randomwalk/bulk/Verify.java
+++ b/test/src/main/java/org/apache/accumulo/test/randomwalk/bulk/Verify.java
@@ -105,7 +105,7 @@ public class Verify extends Test {
public static void main(String args[]) throws Exception {
ClientOnRequiredTable opts = new ClientOnRequiredTable();
opts.parseArgs(Verify.class.getName(), args);
- Scanner scanner = opts.getConnector().createScanner(opts.tableName, opts.auths);
+ Scanner scanner = opts.getConnector().createScanner(opts.getTableName(), opts.auths);
scanner.fetchColumnFamily(BulkPlusOne.CHECK_COLUMN_FAMILY);
Text startBadRow = null;
Text lastBadRow = null;
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/test/src/test/java/org/apache/accumulo/test/functional/BinaryIT.java
----------------------------------------------------------------------
diff --git a/test/src/test/java/org/apache/accumulo/test/functional/BinaryIT.java b/test/src/test/java/org/apache/accumulo/test/functional/BinaryIT.java
index e6c73c2..ef9034b 100644
--- a/test/src/test/java/org/apache/accumulo/test/functional/BinaryIT.java
+++ b/test/src/test/java/org/apache/accumulo/test/functional/BinaryIT.java
@@ -55,7 +55,7 @@ public class BinaryIT extends ConfigurableMacIT {
BatchWriterOpts bwOpts = new BatchWriterOpts();
ScannerOpts scanOpts = new ScannerOpts();
TestBinaryRows.Opts opts = new TestBinaryRows.Opts();
- opts.tableName = "bt";
+ opts.setTableName("bt");
opts.start = 0;
opts.num = 100000;
opts.mode = "ingest";
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/test/src/test/java/org/apache/accumulo/test/functional/BulkIT.java
----------------------------------------------------------------------
diff --git a/test/src/test/java/org/apache/accumulo/test/functional/BulkIT.java b/test/src/test/java/org/apache/accumulo/test/functional/BulkIT.java
index 831dcd4..9520884 100644
--- a/test/src/test/java/org/apache/accumulo/test/functional/BulkIT.java
+++ b/test/src/test/java/org/apache/accumulo/test/functional/BulkIT.java
@@ -62,7 +62,7 @@ public class BulkIT extends SimpleMacIT {
opts.rows = N;
opts.instance = c.getInstance().getInstanceName();
opts.cols = 1;
- opts.tableName = tableName;
+ opts.setTableName(tableName);
String fileFormat = "/testrf/"+filePrefix+"rf%02d";
for (int i = 0; i < COUNT; i++) {
opts.outputFile = base + String.format(fileFormat, i);
@@ -76,7 +76,7 @@ public class BulkIT extends SimpleMacIT {
TestIngest.ingest(c, opts, BWOPTS);
c.tableOperations().importDirectory(tableName, base + "/testrf", bulkFailures, false);
VerifyIngest.Opts vopts = new VerifyIngest.Opts();
- vopts.tableName = tableName;
+ vopts.setTableName(tableName);
vopts.random = 56;
for (int i = 0; i < COUNT; i++) {
vopts.startRow = i * N;
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/test/src/test/java/org/apache/accumulo/test/functional/FateStarvationIT.java
----------------------------------------------------------------------
diff --git a/test/src/test/java/org/apache/accumulo/test/functional/FateStarvationIT.java b/test/src/test/java/org/apache/accumulo/test/functional/FateStarvationIT.java
index 4d75a16..69e4595 100644
--- a/test/src/test/java/org/apache/accumulo/test/functional/FateStarvationIT.java
+++ b/test/src/test/java/org/apache/accumulo/test/functional/FateStarvationIT.java
@@ -50,7 +50,7 @@ public class FateStarvationIT extends SimpleMacIT {
opts.dataSize = 50;
opts.rows = 100000;
opts.cols = 1;
- opts.tableName = tableName;
+ opts.setTableName(tableName);
TestIngest.ingest(c, opts, new BatchWriterOpts());
c.tableOperations().flush(tableName, null, null, true);
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/test/src/test/java/org/apache/accumulo/test/functional/MasterFailoverIT.java
----------------------------------------------------------------------
diff --git a/test/src/test/java/org/apache/accumulo/test/functional/MasterFailoverIT.java b/test/src/test/java/org/apache/accumulo/test/functional/MasterFailoverIT.java
index 218d65e..4fd8b70 100644
--- a/test/src/test/java/org/apache/accumulo/test/functional/MasterFailoverIT.java
+++ b/test/src/test/java/org/apache/accumulo/test/functional/MasterFailoverIT.java
@@ -56,7 +56,7 @@ public class MasterFailoverIT extends ConfigurableMacIT {
c.tableOperations().rename("test_ingest", "test_ingest2");
try {
VerifyIngest.Opts vopts = new VerifyIngest.Opts();
- vopts.tableName = "test_ingest2";
+ vopts.setTableName("test_ingest2");
VerifyIngest.verifyIngest(c, vopts, SOPTS);
} finally {
p.destroy();
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/test/src/test/java/org/apache/accumulo/test/functional/RenameIT.java
----------------------------------------------------------------------
diff --git a/test/src/test/java/org/apache/accumulo/test/functional/RenameIT.java b/test/src/test/java/org/apache/accumulo/test/functional/RenameIT.java
index 8cbe84f..521ff84 100644
--- a/test/src/test/java/org/apache/accumulo/test/functional/RenameIT.java
+++ b/test/src/test/java/org/apache/accumulo/test/functional/RenameIT.java
@@ -39,17 +39,17 @@ public class RenameIT extends SimpleMacIT {
ScannerOpts scanOpts = new ScannerOpts();
TestIngest.Opts opts = new TestIngest.Opts();
opts.createTable = true;
- opts.tableName = name1;
+ opts.setTableName(name1);
Connector c = getConnector();
TestIngest.ingest(c, opts, bwOpts);
c.tableOperations().rename(name1, name2);
TestIngest.ingest(c, opts, bwOpts);
VerifyIngest.Opts vopts = new VerifyIngest.Opts();
- vopts.tableName = name2;
+ vopts.setTableName(name2);
VerifyIngest.verifyIngest(c, vopts, scanOpts);
c.tableOperations().delete(name1);
c.tableOperations().rename(name2, name1);
- vopts.tableName = name1;
+ vopts.setTableName(name1);
VerifyIngest.verifyIngest(c, vopts, scanOpts);
}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/test/src/test/java/org/apache/accumulo/test/functional/TableIT.java
----------------------------------------------------------------------
diff --git a/test/src/test/java/org/apache/accumulo/test/functional/TableIT.java b/test/src/test/java/org/apache/accumulo/test/functional/TableIT.java
index 832ec60..19ea002 100644
--- a/test/src/test/java/org/apache/accumulo/test/functional/TableIT.java
+++ b/test/src/test/java/org/apache/accumulo/test/functional/TableIT.java
@@ -53,11 +53,11 @@ public class TableIT extends SimpleMacIT {
String tableName = getUniqueNames(1)[0];
to.create(tableName);
TestIngest.Opts opts = new TestIngest.Opts();
- opts.tableName = tableName;
+ opts.setTableName(tableName);
TestIngest.ingest(c, opts, new BatchWriterOpts());
to.flush(tableName, null, null, true);
VerifyIngest.Opts vopts = new VerifyIngest.Opts();
- vopts.tableName = tableName;
+ vopts.setTableName(tableName);
VerifyIngest.verifyIngest(c, vopts, new ScannerOpts());
String id = to.tableIdMap().get(tableName);
Scanner s = c.createScanner(MetadataTable.NAME, Authorizations.EMPTY);
http://git-wip-us.apache.org/repos/asf/accumulo/blob/f74c5c6d/test/src/test/java/org/apache/accumulo/test/functional/WriteLotsIT.java
----------------------------------------------------------------------
diff --git a/test/src/test/java/org/apache/accumulo/test/functional/WriteLotsIT.java b/test/src/test/java/org/apache/accumulo/test/functional/WriteLotsIT.java
index 214fc2f..9515089 100644
--- a/test/src/test/java/org/apache/accumulo/test/functional/WriteLotsIT.java
+++ b/test/src/test/java/org/apache/accumulo/test/functional/WriteLotsIT.java
@@ -50,7 +50,7 @@ public class WriteLotsIT extends SimpleMacIT {
TestIngest.Opts opts = new TestIngest.Opts();
opts.startRow = index * 10000;
opts.rows = 10000;
- opts.tableName = tableName;
+ opts.setTableName(tableName);
TestIngest.ingest(c, opts, new BatchWriterOpts());
} catch (Exception ex) {
ref.set(ex);
@@ -68,7 +68,7 @@ public class WriteLotsIT extends SimpleMacIT {
}
VerifyIngest.Opts vopts = new VerifyIngest.Opts();
vopts.rows = 10000 * 10;
- vopts.tableName = tableName;
+ vopts.setTableName(tableName);
VerifyIngest.verifyIngest(c, vopts, new ScannerOpts());
}
[09/12] ACCUMULO-1880 create mapreduce module
Posted by md...@apache.org.
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/InputConfigurator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/InputConfigurator.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/InputConfigurator.java
deleted file mode 100644
index 7657c3c..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/InputConfigurator.java
+++ /dev/null
@@ -1,796 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce.lib.impl;
-
-import static com.google.common.base.Preconditions.checkArgument;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.StringTokenizer;
-
-import org.apache.accumulo.core.client.AccumuloException;
-import org.apache.accumulo.core.client.AccumuloSecurityException;
-import org.apache.accumulo.core.client.ClientSideIteratorScanner;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.Instance;
-import org.apache.accumulo.core.client.IsolatedScanner;
-import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.client.RowIterator;
-import org.apache.accumulo.core.client.Scanner;
-import org.apache.accumulo.core.client.TableNotFoundException;
-import org.apache.accumulo.core.client.impl.Tables;
-import org.apache.accumulo.core.client.impl.TabletLocator;
-import org.apache.accumulo.core.client.mapreduce.InputTableConfig;
-import org.apache.accumulo.core.client.mock.MockTabletLocator;
-import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.KeyExtent;
-import org.apache.accumulo.core.data.PartialKey;
-import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
-import org.apache.accumulo.core.master.state.tables.TableState;
-import org.apache.accumulo.core.metadata.MetadataTable;
-import org.apache.accumulo.core.metadata.schema.MetadataSchema;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.core.security.TablePermission;
-import org.apache.accumulo.core.util.Pair;
-import org.apache.accumulo.core.util.TextUtil;
-import org.apache.commons.codec.binary.Base64;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.MapWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.util.StringUtils;
-
-import com.google.common.collect.Maps;
-
-/**
- * @since 1.6.0
- */
-public class InputConfigurator extends ConfiguratorBase {
-
- /**
- * Configuration keys for {@link Scanner}.
- *
- * @since 1.6.0
- */
- public static enum ScanOpts {
- TABLE_NAME, AUTHORIZATIONS, RANGES, COLUMNS, ITERATORS, TABLE_CONFIGS
- }
-
- /**
- * Configuration keys for various features.
- *
- * @since 1.6.0
- */
- public static enum Features {
- AUTO_ADJUST_RANGES, SCAN_ISOLATION, USE_LOCAL_ITERATORS, SCAN_OFFLINE
- }
-
- /**
- * Sets the name of the input table, over which this job will scan.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param tableName
- * the table to use when the tablename is null in the write call
- * @since 1.6.0
- */
- public static void setInputTableName(Class<?> implementingClass, Configuration conf, String tableName) {
- checkArgument(tableName != null, "tableName is null");
- conf.set(enumToConfKey(implementingClass, ScanOpts.TABLE_NAME), tableName);
- }
-
- /**
- * Sets the name of the input table, over which this job will scan.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @since 1.6.0
- */
- public static String getInputTableName(Class<?> implementingClass, Configuration conf) {
- return conf.get(enumToConfKey(implementingClass, ScanOpts.TABLE_NAME));
- }
-
- /**
- * Sets the {@link Authorizations} used to scan. Must be a subset of the user's authorization. Defaults to the empty set.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param auths
- * the user's authorizations
- * @since 1.6.0
- */
- public static void setScanAuthorizations(Class<?> implementingClass, Configuration conf, Authorizations auths) {
- if (auths != null && !auths.isEmpty())
- conf.set(enumToConfKey(implementingClass, ScanOpts.AUTHORIZATIONS), auths.serialize());
- }
-
- /**
- * Gets the authorizations to set for the scans from the configuration.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @return the Accumulo scan authorizations
- * @since 1.6.0
- * @see #setScanAuthorizations(Class, Configuration, Authorizations)
- */
- public static Authorizations getScanAuthorizations(Class<?> implementingClass, Configuration conf) {
- String authString = conf.get(enumToConfKey(implementingClass, ScanOpts.AUTHORIZATIONS));
- return authString == null ? Authorizations.EMPTY : new Authorizations(authString.getBytes(StandardCharsets.UTF_8));
- }
-
- /**
- * Sets the input ranges to scan on all input tables for this job. If not set, the entire table will be scanned.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param ranges
- * the ranges that will be mapped over
- * @throws IllegalArgumentException
- * if the ranges cannot be encoded into base 64
- * @since 1.6.0
- */
- public static void setRanges(Class<?> implementingClass, Configuration conf, Collection<Range> ranges) {
- checkArgument(ranges != null, "ranges is null");
-
- ArrayList<String> rangeStrings = new ArrayList<String>(ranges.size());
- try {
- for (Range r : ranges) {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- r.write(new DataOutputStream(baos));
- rangeStrings.add(new String(Base64.encodeBase64(baos.toByteArray())));
- }
- conf.setStrings(enumToConfKey(implementingClass, ScanOpts.RANGES), rangeStrings.toArray(new String[0]));
- } catch (IOException ex) {
- throw new IllegalArgumentException("Unable to encode ranges to Base64", ex);
- }
- }
-
- /**
- * Gets the ranges to scan over from a job.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @return the ranges
- * @throws IOException
- * if the ranges have been encoded improperly
- * @since 1.6.0
- * @see #setRanges(Class, Configuration, Collection)
- */
- public static List<Range> getRanges(Class<?> implementingClass, Configuration conf) throws IOException {
-
- Collection<String> encodedRanges = conf.getStringCollection(enumToConfKey(implementingClass, ScanOpts.RANGES));
- List<Range> ranges = new ArrayList<Range>();
- for (String rangeString : encodedRanges) {
- ByteArrayInputStream bais = new ByteArrayInputStream(Base64.decodeBase64(rangeString.getBytes()));
- Range range = new Range();
- range.readFields(new DataInputStream(bais));
- ranges.add(range);
- }
- return ranges;
- }
-
- /**
- * Gets a list of the iterator settings (for iterators to apply to a scanner) from this configuration.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @return a list of iterators
- * @since 1.6.0
- * @see #addIterator(Class, Configuration, IteratorSetting)
- */
- public static List<IteratorSetting> getIterators(Class<?> implementingClass, Configuration conf) {
- String iterators = conf.get(enumToConfKey(implementingClass, ScanOpts.ITERATORS));
-
- // If no iterators are present, return an empty list
- if (iterators == null || iterators.isEmpty())
- return new ArrayList<IteratorSetting>();
-
- // Compose the set of iterators encoded in the job configuration
- StringTokenizer tokens = new StringTokenizer(iterators, StringUtils.COMMA_STR);
- List<IteratorSetting> list = new ArrayList<IteratorSetting>();
- try {
- while (tokens.hasMoreTokens()) {
- String itstring = tokens.nextToken();
- ByteArrayInputStream bais = new ByteArrayInputStream(Base64.decodeBase64(itstring.getBytes()));
- list.add(new IteratorSetting(new DataInputStream(bais)));
- bais.close();
- }
- } catch (IOException e) {
- throw new IllegalArgumentException("couldn't decode iterator settings");
- }
- return list;
- }
-
- /**
- * Restricts the columns that will be mapped over for the single input table on this job.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param columnFamilyColumnQualifierPairs
- * a pair of {@link Text} objects corresponding to column family and column qualifier. If the column qualifier is null, the entire column family is
- * selected. An empty set is the default and is equivalent to scanning the all columns.
- * @throws IllegalArgumentException
- * if the column family is null
- * @since 1.6.0
- */
- public static void fetchColumns(Class<?> implementingClass, Configuration conf, Collection<Pair<Text,Text>> columnFamilyColumnQualifierPairs) {
- checkArgument(columnFamilyColumnQualifierPairs != null, "columnFamilyColumnQualifierPairs is null");
- String[] columnStrings = serializeColumns(columnFamilyColumnQualifierPairs);
- conf.setStrings(enumToConfKey(implementingClass, ScanOpts.COLUMNS), columnStrings);
- }
-
- public static String[] serializeColumns(Collection<Pair<Text,Text>> columnFamilyColumnQualifierPairs) {
- checkArgument(columnFamilyColumnQualifierPairs != null, "columnFamilyColumnQualifierPairs is null");
- ArrayList<String> columnStrings = new ArrayList<String>(columnFamilyColumnQualifierPairs.size());
- for (Pair<Text,Text> column : columnFamilyColumnQualifierPairs) {
-
- if (column.getFirst() == null)
- throw new IllegalArgumentException("Column family can not be null");
-
- String col = new String(Base64.encodeBase64(TextUtil.getBytes(column.getFirst())), StandardCharsets.UTF_8);
- if (column.getSecond() != null)
- col += ":" + new String(Base64.encodeBase64(TextUtil.getBytes(column.getSecond())), StandardCharsets.UTF_8);
- columnStrings.add(col);
- }
-
- return columnStrings.toArray(new String[0]);
- }
-
- /**
- * Gets the columns to be mapped over from this job.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @return a set of columns
- * @since 1.6.0
- * @see #fetchColumns(Class, Configuration, Collection)
- */
- public static Set<Pair<Text,Text>> getFetchedColumns(Class<?> implementingClass, Configuration conf) {
- checkArgument(conf != null, "conf is null");
- String confValue = conf.get(enumToConfKey(implementingClass, ScanOpts.COLUMNS));
- List<String> serialized = new ArrayList<String>();
- if (confValue != null) {
- // Split and include any trailing empty strings to allow empty column families
- for (String val : confValue.split(",", -1)) {
- serialized.add(val);
- }
- }
- return deserializeFetchedColumns(serialized);
- }
-
- public static Set<Pair<Text,Text>> deserializeFetchedColumns(Collection<String> serialized) {
- Set<Pair<Text,Text>> columns = new HashSet<Pair<Text,Text>>();
-
- if (null == serialized) {
- return columns;
- }
-
- for (String col : serialized) {
- int idx = col.indexOf(":");
- Text cf = new Text(idx < 0 ? Base64.decodeBase64(col.getBytes(StandardCharsets.UTF_8)) : Base64.decodeBase64(col.substring(0, idx).getBytes(
- StandardCharsets.UTF_8)));
- Text cq = idx < 0 ? null : new Text(Base64.decodeBase64(col.substring(idx + 1).getBytes(StandardCharsets.UTF_8)));
- columns.add(new Pair<Text,Text>(cf, cq));
- }
- return columns;
- }
-
- /**
- * Encode an iterator on the input for the single input table associated with this job.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param cfg
- * the configuration of the iterator
- * @throws IllegalArgumentException
- * if the iterator can't be serialized into the configuration
- * @since 1.6.0
- */
- public static void addIterator(Class<?> implementingClass, Configuration conf, IteratorSetting cfg) {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- String newIter;
- try {
- cfg.write(new DataOutputStream(baos));
- newIter = new String(Base64.encodeBase64(baos.toByteArray()), StandardCharsets.UTF_8);
- baos.close();
- } catch (IOException e) {
- throw new IllegalArgumentException("unable to serialize IteratorSetting");
- }
-
- String confKey = enumToConfKey(implementingClass, ScanOpts.ITERATORS);
- String iterators = conf.get(confKey);
- // No iterators specified yet, create a new string
- if (iterators == null || iterators.isEmpty()) {
- iterators = newIter;
- } else {
- // append the next iterator & reset
- iterators = iterators.concat(StringUtils.COMMA_STR + newIter);
- }
- // Store the iterators w/ the job
- conf.set(confKey, iterators);
- }
-
- /**
- * Controls the automatic adjustment of ranges for this job. This feature merges overlapping ranges, then splits them to align with tablet boundaries.
- * Disabling this feature will cause exactly one Map task to be created for each specified range. The default setting is enabled. *
- *
- * <p>
- * By default, this feature is <b>enabled</b>.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param enableFeature
- * the feature is enabled if true, disabled otherwise
- * @see #setRanges(Class, Configuration, Collection)
- * @since 1.6.0
- */
- public static void setAutoAdjustRanges(Class<?> implementingClass, Configuration conf, boolean enableFeature) {
- conf.setBoolean(enumToConfKey(implementingClass, Features.AUTO_ADJUST_RANGES), enableFeature);
- }
-
- /**
- * Determines whether a configuration has auto-adjust ranges enabled.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @return false if the feature is disabled, true otherwise
- * @since 1.6.0
- * @see #setAutoAdjustRanges(Class, Configuration, boolean)
- */
- public static Boolean getAutoAdjustRanges(Class<?> implementingClass, Configuration conf) {
- return conf.getBoolean(enumToConfKey(implementingClass, Features.AUTO_ADJUST_RANGES), true);
- }
-
- /**
- * Controls the use of the {@link IsolatedScanner} in this job.
- *
- * <p>
- * By default, this feature is <b>disabled</b>.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param enableFeature
- * the feature is enabled if true, disabled otherwise
- * @since 1.6.0
- */
- public static void setScanIsolation(Class<?> implementingClass, Configuration conf, boolean enableFeature) {
- conf.setBoolean(enumToConfKey(implementingClass, Features.SCAN_ISOLATION), enableFeature);
- }
-
- /**
- * Determines whether a configuration has isolation enabled.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @return true if the feature is enabled, false otherwise
- * @since 1.6.0
- * @see #setScanIsolation(Class, Configuration, boolean)
- */
- public static Boolean isIsolated(Class<?> implementingClass, Configuration conf) {
- return conf.getBoolean(enumToConfKey(implementingClass, Features.SCAN_ISOLATION), false);
- }
-
- /**
- * Controls the use of the {@link ClientSideIteratorScanner} in this job. Enabling this feature will cause the iterator stack to be constructed within the Map
- * task, rather than within the Accumulo TServer. To use this feature, all classes needed for those iterators must be available on the classpath for the task.
- *
- * <p>
- * By default, this feature is <b>disabled</b>.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param enableFeature
- * the feature is enabled if true, disabled otherwise
- * @since 1.6.0
- */
- public static void setLocalIterators(Class<?> implementingClass, Configuration conf, boolean enableFeature) {
- conf.setBoolean(enumToConfKey(implementingClass, Features.USE_LOCAL_ITERATORS), enableFeature);
- }
-
- /**
- * Determines whether a configuration uses local iterators.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @return true if the feature is enabled, false otherwise
- * @since 1.6.0
- * @see #setLocalIterators(Class, Configuration, boolean)
- */
- public static Boolean usesLocalIterators(Class<?> implementingClass, Configuration conf) {
- return conf.getBoolean(enumToConfKey(implementingClass, Features.USE_LOCAL_ITERATORS), false);
- }
-
- /**
- * <p>
- * Enable reading offline tables. By default, this feature is disabled and only online tables are scanned. This will make the map reduce job directly read the
- * table's files. If the table is not offline, then the job will fail. If the table comes online during the map reduce job, it is likely that the job will
- * fail.
- *
- * <p>
- * To use this option, the map reduce user will need access to read the Accumulo directory in HDFS.
- *
- * <p>
- * Reading the offline table will create the scan time iterator stack in the map process. So any iterators that are configured for the table will need to be
- * on the mapper's classpath.
- *
- * <p>
- * One way to use this feature is to clone a table, take the clone offline, and use the clone as the input table for a map reduce job. If you plan to map
- * reduce over the data many times, it may be better to the compact the table, clone it, take it offline, and use the clone for all map reduce jobs. The
- * reason to do this is that compaction will reduce each tablet in the table to one file, and it is faster to read from one file.
- *
- * <p>
- * There are two possible advantages to reading a tables file directly out of HDFS. First, you may see better read performance. Second, it will support
- * speculative execution better. When reading an online table speculative execution can put more load on an already slow tablet server.
- *
- * <p>
- * By default, this feature is <b>disabled</b>.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param enableFeature
- * the feature is enabled if true, disabled otherwise
- * @since 1.6.0
- */
- public static void setOfflineTableScan(Class<?> implementingClass, Configuration conf, boolean enableFeature) {
- conf.setBoolean(enumToConfKey(implementingClass, Features.SCAN_OFFLINE), enableFeature);
- }
-
- /**
- * Determines whether a configuration has the offline table scan feature enabled.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @return true if the feature is enabled, false otherwise
- * @since 1.6.0
- * @see #setOfflineTableScan(Class, Configuration, boolean)
- */
- public static Boolean isOfflineScan(Class<?> implementingClass, Configuration conf) {
- return conf.getBoolean(enumToConfKey(implementingClass, Features.SCAN_OFFLINE), false);
- }
-
- /**
- * Sets configurations for multiple tables at a time.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param configs
- * an array of {@link InputTableConfig} objects to associate with the job
- * @since 1.6.0
- */
- public static void setInputTableConfigs(Class<?> implementingClass, Configuration conf, Map<String,InputTableConfig> configs) {
- MapWritable mapWritable = new MapWritable();
- for (Map.Entry<String,InputTableConfig> tableConfig : configs.entrySet())
- mapWritable.put(new Text(tableConfig.getKey()), tableConfig.getValue());
-
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- try {
- mapWritable.write(new DataOutputStream(baos));
- } catch (IOException e) {
- throw new IllegalStateException("Table configuration could not be serialized.");
- }
-
- String confKey = enumToConfKey(implementingClass, ScanOpts.TABLE_CONFIGS);
- conf.set(confKey, new String(Base64.encodeBase64(baos.toByteArray())));
- }
-
- /**
- * Returns all {@link InputTableConfig} objects associated with this job.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @return all of the table query configs for the job
- * @since 1.6.0
- */
- public static Map<String,InputTableConfig> getInputTableConfigs(Class<?> implementingClass, Configuration conf) {
- Map<String,InputTableConfig> configs = new HashMap<String,InputTableConfig>();
- Map.Entry<String,InputTableConfig> defaultConfig = getDefaultInputTableConfig(implementingClass, conf);
- if (defaultConfig != null)
- configs.put(defaultConfig.getKey(), defaultConfig.getValue());
- String configString = conf.get(enumToConfKey(implementingClass, ScanOpts.TABLE_CONFIGS));
- MapWritable mapWritable = new MapWritable();
- if (configString != null) {
- try {
- byte[] bytes = Base64.decodeBase64(configString.getBytes());
- ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
- mapWritable.readFields(new DataInputStream(bais));
- bais.close();
- } catch (IOException e) {
- throw new IllegalStateException("The table query configurations could not be deserialized from the given configuration");
- }
- }
- for (Map.Entry<Writable,Writable> entry : mapWritable.entrySet())
- configs.put(((Text) entry.getKey()).toString(), (InputTableConfig) entry.getValue());
-
- return configs;
- }
-
- /**
- * Returns the {@link InputTableConfig} for the given table
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param tableName
- * the table name for which to fetch the table query config
- * @return the table query config for the given table name (if it exists) and null if it does not
- * @since 1.6.0
- */
- public static InputTableConfig getInputTableConfig(Class<?> implementingClass, Configuration conf, String tableName) {
- Map<String,InputTableConfig> queryConfigs = getInputTableConfigs(implementingClass, conf);
- return queryConfigs.get(tableName);
- }
-
- /**
- * Initializes an Accumulo {@link TabletLocator} based on the configuration.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param tableId
- * The table id for which to initialize the {@link TabletLocator}
- * @return an Accumulo tablet locator
- * @throws TableNotFoundException
- * if the table name set on the configuration doesn't exist
- * @since 1.6.0
- */
- public static TabletLocator getTabletLocator(Class<?> implementingClass, Configuration conf, String tableId) throws TableNotFoundException {
- String instanceType = conf.get(enumToConfKey(implementingClass, InstanceOpts.TYPE));
- if ("MockInstance".equals(instanceType))
- return new MockTabletLocator();
- Instance instance = getInstance(implementingClass, conf);
- return TabletLocator.getLocator(instance, new Text(tableId));
- }
-
- // InputFormat doesn't have the equivalent of OutputFormat's checkOutputSpecs(JobContext job)
- /**
- * Check whether a configuration is fully configured to be used with an Accumulo {@link org.apache.hadoop.mapreduce.InputFormat}.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @throws IOException
- * if the context is improperly configured
- * @since 1.6.0
- */
- public static void validateOptions(Class<?> implementingClass, Configuration conf) throws IOException {
-
- Map<String,InputTableConfig> inputTableConfigs = getInputTableConfigs(implementingClass, conf);
- if (!isConnectorInfoSet(implementingClass, conf))
- throw new IOException("Input info has not been set.");
- String instanceKey = conf.get(enumToConfKey(implementingClass, InstanceOpts.TYPE));
- if (!"MockInstance".equals(instanceKey) && !"ZooKeeperInstance".equals(instanceKey))
- throw new IOException("Instance info has not been set.");
- // validate that we can connect as configured
- try {
- String principal = getPrincipal(implementingClass, conf);
- AuthenticationToken token = getAuthenticationToken(implementingClass, conf);
- Connector c = getInstance(implementingClass, conf).getConnector(principal, token);
- if (!c.securityOperations().authenticateUser(principal, token))
- throw new IOException("Unable to authenticate user");
-
- if (getInputTableConfigs(implementingClass, conf).size() == 0)
- throw new IOException("No table set.");
-
- for (Map.Entry<String,InputTableConfig> tableConfig : inputTableConfigs.entrySet()) {
- if (!c.securityOperations().hasTablePermission(getPrincipal(implementingClass, conf), tableConfig.getKey(), TablePermission.READ))
- throw new IOException("Unable to access table");
- }
- for (Map.Entry<String,InputTableConfig> tableConfigEntry : inputTableConfigs.entrySet()) {
- InputTableConfig tableConfig = tableConfigEntry.getValue();
- if (!tableConfig.shouldUseLocalIterators()) {
- if (tableConfig.getIterators() != null) {
- for (IteratorSetting iter : tableConfig.getIterators()) {
- if (!c.tableOperations().testClassLoad(tableConfigEntry.getKey(), iter.getIteratorClass(), SortedKeyValueIterator.class.getName()))
- throw new AccumuloException("Servers are unable to load " + iter.getIteratorClass() + " as a " + SortedKeyValueIterator.class.getName());
- }
- }
- }
- }
- } catch (AccumuloException e) {
- throw new IOException(e);
- } catch (AccumuloSecurityException e) {
- throw new IOException(e);
- } catch (TableNotFoundException e) {
- throw new IOException(e);
- }
- }
-
- /**
- * Returns the {@link org.apache.accumulo.core.client.mapreduce.InputTableConfig} for the configuration based on the properties set using the single-table
- * input methods.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop instance for which to retrieve the configuration
- * @return the config object built from the single input table properties set on the job
- * @since 1.6.0
- */
- protected static Map.Entry<String,InputTableConfig> getDefaultInputTableConfig(Class<?> implementingClass, Configuration conf) {
- String tableName = getInputTableName(implementingClass, conf);
- if (tableName != null) {
- InputTableConfig queryConfig = new InputTableConfig();
- List<IteratorSetting> itrs = getIterators(implementingClass, conf);
- if (itrs != null)
- queryConfig.setIterators(itrs);
- Set<Pair<Text,Text>> columns = getFetchedColumns(implementingClass, conf);
- if (columns != null)
- queryConfig.fetchColumns(columns);
- List<Range> ranges = null;
- try {
- ranges = getRanges(implementingClass, conf);
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- if (ranges != null)
- queryConfig.setRanges(ranges);
-
- queryConfig.setAutoAdjustRanges(getAutoAdjustRanges(implementingClass, conf)).setUseIsolatedScanners(isIsolated(implementingClass, conf))
- .setUseLocalIterators(usesLocalIterators(implementingClass, conf)).setOfflineScan(isOfflineScan(implementingClass, conf));
- return Maps.immutableEntry(tableName, queryConfig);
- }
- return null;
- }
-
- public static Map<String,Map<KeyExtent,List<Range>>> binOffline(String tableId, List<Range> ranges, Instance instance, Connector conn)
- throws AccumuloException, TableNotFoundException {
- Map<String,Map<KeyExtent,List<Range>>> binnedRanges = new HashMap<String,Map<KeyExtent,List<Range>>>();
-
- if (Tables.getTableState(instance, tableId) != TableState.OFFLINE) {
- Tables.clearCache(instance);
- if (Tables.getTableState(instance, tableId) != TableState.OFFLINE) {
- throw new AccumuloException("Table is online tableId:" + tableId + " cannot scan table in offline mode ");
- }
- }
-
- for (Range range : ranges) {
- Text startRow;
-
- if (range.getStartKey() != null)
- startRow = range.getStartKey().getRow();
- else
- startRow = new Text();
-
- Range metadataRange = new Range(new KeyExtent(new Text(tableId), startRow, null).getMetadataEntry(), true, null, false);
- Scanner scanner = conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY);
- MetadataSchema.TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN.fetch(scanner);
- scanner.fetchColumnFamily(MetadataSchema.TabletsSection.LastLocationColumnFamily.NAME);
- scanner.fetchColumnFamily(MetadataSchema.TabletsSection.CurrentLocationColumnFamily.NAME);
- scanner.fetchColumnFamily(MetadataSchema.TabletsSection.FutureLocationColumnFamily.NAME);
- scanner.setRange(metadataRange);
-
- RowIterator rowIter = new RowIterator(scanner);
- KeyExtent lastExtent = null;
- while (rowIter.hasNext()) {
- Iterator<Map.Entry<Key,Value>> row = rowIter.next();
- String last = "";
- KeyExtent extent = null;
- String location = null;
-
- while (row.hasNext()) {
- Map.Entry<Key,Value> entry = row.next();
- Key key = entry.getKey();
-
- if (key.getColumnFamily().equals(MetadataSchema.TabletsSection.LastLocationColumnFamily.NAME)) {
- last = entry.getValue().toString();
- }
-
- if (key.getColumnFamily().equals(MetadataSchema.TabletsSection.CurrentLocationColumnFamily.NAME)
- || key.getColumnFamily().equals(MetadataSchema.TabletsSection.FutureLocationColumnFamily.NAME)) {
- location = entry.getValue().toString();
- }
-
- if (MetadataSchema.TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN.hasColumns(key)) {
- extent = new KeyExtent(key.getRow(), entry.getValue());
- }
-
- }
-
- if (location != null)
- return null;
-
- if (!extent.getTableId().toString().equals(tableId)) {
- throw new AccumuloException("Saw unexpected table Id " + tableId + " " + extent);
- }
-
- if (lastExtent != null && !extent.isPreviousExtent(lastExtent)) {
- throw new AccumuloException(" " + lastExtent + " is not previous extent " + extent);
- }
-
- Map<KeyExtent,List<Range>> tabletRanges = binnedRanges.get(last);
- if (tabletRanges == null) {
- tabletRanges = new HashMap<KeyExtent,List<Range>>();
- binnedRanges.put(last, tabletRanges);
- }
-
- List<Range> rangeList = tabletRanges.get(extent);
- if (rangeList == null) {
- rangeList = new ArrayList<Range>();
- tabletRanges.put(extent, rangeList);
- }
-
- rangeList.add(range);
-
- if (extent.getEndRow() == null || range.afterEndKey(new Key(extent.getEndRow()).followingKey(PartialKey.ROW))) {
- break;
- }
-
- lastExtent = extent;
- }
-
- }
- return binnedRanges;
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/OutputConfigurator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/OutputConfigurator.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/OutputConfigurator.java
deleted file mode 100644
index 727971a..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/OutputConfigurator.java
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce.lib.impl;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-
-import org.apache.accumulo.core.client.BatchWriter;
-import org.apache.accumulo.core.client.BatchWriterConfig;
-import org.apache.hadoop.conf.Configuration;
-
-/**
- * @since 1.6.0
- */
-public class OutputConfigurator extends ConfiguratorBase {
-
- /**
- * Configuration keys for {@link BatchWriter}.
- *
- * @since 1.6.0
- */
- public static enum WriteOpts {
- DEFAULT_TABLE_NAME, BATCH_WRITER_CONFIG
- }
-
- /**
- * Configuration keys for various features.
- *
- * @since 1.6.0
- */
- public static enum Features {
- CAN_CREATE_TABLES, SIMULATION_MODE
- }
-
- /**
- * Sets the default table name to use if one emits a null in place of a table name for a given mutation. Table names can only be alpha-numeric and
- * underscores.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param tableName
- * the table to use when the tablename is null in the write call
- * @since 1.6.0
- */
- public static void setDefaultTableName(Class<?> implementingClass, Configuration conf, String tableName) {
- if (tableName != null)
- conf.set(enumToConfKey(implementingClass, WriteOpts.DEFAULT_TABLE_NAME), tableName);
- }
-
- /**
- * Gets the default table name from the configuration.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @return the default table name
- * @since 1.6.0
- * @see #setDefaultTableName(Class, Configuration, String)
- */
- public static String getDefaultTableName(Class<?> implementingClass, Configuration conf) {
- return conf.get(enumToConfKey(implementingClass, WriteOpts.DEFAULT_TABLE_NAME));
- }
-
- /**
- * Sets the configuration for for the job's {@link BatchWriter} instances. If not set, a new {@link BatchWriterConfig}, with sensible built-in defaults is
- * used. Setting the configuration multiple times overwrites any previous configuration.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param bwConfig
- * the configuration for the {@link BatchWriter}
- * @since 1.6.0
- */
- public static void setBatchWriterOptions(Class<?> implementingClass, Configuration conf, BatchWriterConfig bwConfig) {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- String serialized;
- try {
- bwConfig.write(new DataOutputStream(baos));
- serialized = new String(baos.toByteArray(), StandardCharsets.UTF_8);
- baos.close();
- } catch (IOException e) {
- throw new IllegalArgumentException("unable to serialize " + BatchWriterConfig.class.getName());
- }
- conf.set(enumToConfKey(implementingClass, WriteOpts.BATCH_WRITER_CONFIG), serialized);
- }
-
- /**
- * Gets the {@link BatchWriterConfig} settings.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @return the configuration object
- * @since 1.6.0
- * @see #setBatchWriterOptions(Class, Configuration, BatchWriterConfig)
- */
- public static BatchWriterConfig getBatchWriterOptions(Class<?> implementingClass, Configuration conf) {
- String serialized = conf.get(enumToConfKey(implementingClass, WriteOpts.BATCH_WRITER_CONFIG));
- BatchWriterConfig bwConfig = new BatchWriterConfig();
- if (serialized == null || serialized.isEmpty()) {
- return bwConfig;
- } else {
- try {
- ByteArrayInputStream bais = new ByteArrayInputStream(serialized.getBytes(StandardCharsets.UTF_8));
- bwConfig.readFields(new DataInputStream(bais));
- bais.close();
- return bwConfig;
- } catch (IOException e) {
- throw new IllegalArgumentException("unable to serialize " + BatchWriterConfig.class.getName());
- }
- }
- }
-
- /**
- * Sets the directive to create new tables, as necessary. Table names can only be alpha-numeric and underscores.
- *
- * <p>
- * By default, this feature is <b>disabled</b>.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param enableFeature
- * the feature is enabled if true, disabled otherwise
- * @since 1.6.0
- */
- public static void setCreateTables(Class<?> implementingClass, Configuration conf, boolean enableFeature) {
- conf.setBoolean(enumToConfKey(implementingClass, Features.CAN_CREATE_TABLES), enableFeature);
- }
-
- /**
- * Determines whether tables are permitted to be created as needed.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @return true if the feature is disabled, false otherwise
- * @since 1.6.0
- * @see #setCreateTables(Class, Configuration, boolean)
- */
- public static Boolean canCreateTables(Class<?> implementingClass, Configuration conf) {
- return conf.getBoolean(enumToConfKey(implementingClass, Features.CAN_CREATE_TABLES), false);
- }
-
- /**
- * Sets the directive to use simulation mode for this job. In simulation mode, no output is produced. This is useful for testing.
- *
- * <p>
- * By default, this feature is <b>disabled</b>.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param enableFeature
- * the feature is enabled if true, disabled otherwise
- * @since 1.6.0
- */
- public static void setSimulationMode(Class<?> implementingClass, Configuration conf, boolean enableFeature) {
- conf.setBoolean(enumToConfKey(implementingClass, Features.SIMULATION_MODE), enableFeature);
- }
-
- /**
- * Determines whether this feature is enabled.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @return true if the feature is enabled, false otherwise
- * @since 1.6.0
- * @see #setSimulationMode(Class, Configuration, boolean)
- */
- public static Boolean getSimulationMode(Class<?> implementingClass, Configuration conf) {
- return conf.getBoolean(enumToConfKey(implementingClass, Features.SIMULATION_MODE), false);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/package-info.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/package-info.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/package-info.java
deleted file mode 100644
index 243160d..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/package-info.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * This package exists to store common helpers for configuring MapReduce jobs in a single location. It contains static configurator methods, stored in classes
- * separate from the things they configure (typically, {@link org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat}/
- * {@link org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat} and related classes in compatible frameworks), rather than storing them in those
- * InputFormats/OutputFormats, so as not to clutter their API with methods that don't match the conventions for that framework. These classes may be useful to
- * input/output plugins for other frameworks, so they can reuse the same configuration options and/or serialize them into a
- * {@link org.apache.hadoop.conf.Configuration} instance in a standard way.
- *
- * <p>
- * It is not expected these will change much (except when new features are added), but end users should not use these classes. They should use the static
- * configurators on the {@link org.apache.hadoop.mapreduce.InputFormat} or {@link org.apache.hadoop.mapreduce.OutputFormat} they are configuring, which in turn
- * may use these classes to implement their own static configurators. Once again, these classes are intended for internal use, but may be useful to developers
- * of plugins for other frameworks that read/write to Accumulo.
- *
- * @since 1.6.0
- */
-package org.apache.accumulo.core.client.mapreduce.lib.impl;
-
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/partition/KeyRangePartitioner.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/partition/KeyRangePartitioner.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/partition/KeyRangePartitioner.java
deleted file mode 100644
index c59841d..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/partition/KeyRangePartitioner.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce.lib.partition;
-
-import org.apache.accumulo.core.data.Key;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Partitioner;
-
-/**
- * Hadoop partitioner that uses ranges based on row keys, and optionally sub-bins based on hashing.
- */
-public class KeyRangePartitioner extends Partitioner<Key,Writable> implements Configurable {
- private RangePartitioner rp = new RangePartitioner();
-
- @Override
- public int getPartition(Key key, Writable value, int numPartitions) {
- return rp.getPartition(key.getRow(), value, numPartitions);
- }
-
- @Override
- public Configuration getConf() {
- return rp.getConf();
- }
-
- @Override
- public void setConf(Configuration conf) {
- rp.setConf(conf);
- }
-
- /**
- * Sets the hdfs file name to use, containing a newline separated list of Base64 encoded split points that represent ranges for partitioning
- */
- public static void setSplitFile(Job job, String file) {
- RangePartitioner.setSplitFile(job, file);
- }
-
- /**
- * Sets the number of random sub-bins per range
- */
- public static void setNumSubBins(Job job, int num) {
- RangePartitioner.setNumSubBins(job, num);
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/partition/RangePartitioner.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/partition/RangePartitioner.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/partition/RangePartitioner.java
deleted file mode 100644
index 1b7501c..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/partition/RangePartitioner.java
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce.lib.partition;
-
-import java.io.BufferedReader;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.net.URI;
-import java.nio.charset.StandardCharsets;
-import java.util.Arrays;
-import java.util.Scanner;
-import java.util.TreeSet;
-
-import org.apache.accumulo.core.client.mapreduce.lib.impl.DistributedCacheHelper;
-import org.apache.commons.codec.binary.Base64;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Partitioner;
-
-/**
- * Hadoop partitioner that uses ranges, and optionally sub-bins based on hashing.
- */
-public class RangePartitioner extends Partitioner<Text,Writable> implements Configurable {
- private static final String PREFIX = RangePartitioner.class.getName();
- private static final String CUTFILE_KEY = PREFIX + ".cutFile";
- private static final String NUM_SUBBINS = PREFIX + ".subBins";
-
- private Configuration conf;
-
- @Override
- public int getPartition(Text key, Writable value, int numPartitions) {
- try {
- return findPartition(key, getCutPoints(), getNumSubBins());
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
-
- int findPartition(Text key, Text[] array, int numSubBins) {
- // find the bin for the range, and guarantee it is positive
- int index = Arrays.binarySearch(array, key);
- index = index < 0 ? (index + 1) * -1 : index;
-
- // both conditions work with numSubBins == 1, but this check is to avoid
- // hashing, when we don't need to, for speed
- if (numSubBins < 2)
- return index;
- return (key.toString().hashCode() & Integer.MAX_VALUE) % numSubBins + index * numSubBins;
- }
-
- private int _numSubBins = 0;
-
- private synchronized int getNumSubBins() {
- if (_numSubBins < 1) {
- // get number of sub-bins and guarantee it is positive
- _numSubBins = Math.max(1, getConf().getInt(NUM_SUBBINS, 1));
- }
- return _numSubBins;
- }
-
- private Text cutPointArray[] = null;
-
- private synchronized Text[] getCutPoints() throws IOException {
- if (cutPointArray == null) {
- String cutFileName = conf.get(CUTFILE_KEY);
- Path[] cf = DistributedCacheHelper.getLocalCacheFiles(conf);
-
- if (cf != null) {
- for (Path path : cf) {
- if (path.toUri().getPath().endsWith(cutFileName.substring(cutFileName.lastIndexOf('/')))) {
- TreeSet<Text> cutPoints = new TreeSet<Text>();
- Scanner in = new Scanner(new BufferedReader(new InputStreamReader(new FileInputStream(path.toString()), StandardCharsets.UTF_8)));
- try {
- while (in.hasNextLine())
- cutPoints.add(new Text(Base64.decodeBase64(in.nextLine().getBytes(StandardCharsets.UTF_8))));
- } finally {
- in.close();
- }
- cutPointArray = cutPoints.toArray(new Text[cutPoints.size()]);
- break;
- }
- }
- }
- if (cutPointArray == null)
- throw new FileNotFoundException(cutFileName + " not found in distributed cache");
- }
- return cutPointArray;
- }
-
- @Override
- public Configuration getConf() {
- return conf;
- }
-
- @Override
- public void setConf(Configuration conf) {
- this.conf = conf;
- }
-
- /**
- * Sets the hdfs file name to use, containing a newline separated list of Base64 encoded split points that represent ranges for partitioning
- */
- public static void setSplitFile(Job job, String file) {
- URI uri = new Path(file).toUri();
- DistributedCacheHelper.addCacheFile(uri, job.getConfiguration());
- job.getConfiguration().set(CUTFILE_KEY, uri.getPath());
- }
-
- /**
- * Sets the number of random sub-bins per range
- */
- public static void setNumSubBins(Job job, int num) {
- job.getConfiguration().setInt(NUM_SUBBINS, num);
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormatTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormatTest.java b/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormatTest.java
deleted file mode 100644
index aad544b..0000000
--- a/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormatTest.java
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapred;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.File;
-import java.io.FileFilter;
-import java.io.IOException;
-
-import org.apache.accumulo.core.client.BatchWriter;
-import org.apache.accumulo.core.client.BatchWriterConfig;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.client.security.tokens.PasswordToken;
-import org.apache.accumulo.core.conf.AccumuloConfiguration;
-import org.apache.accumulo.core.conf.Property;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.util.CachedConfiguration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.lib.IdentityMapper;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.log4j.Logger;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-
-public class AccumuloFileOutputFormatTest {
- private static final String PREFIX = AccumuloFileOutputFormatTest.class.getSimpleName();
- private static final String INSTANCE_NAME = PREFIX + "_mapred_instance";
- private static final String BAD_TABLE = PREFIX + "_mapred_bad_table";
- private static final String TEST_TABLE = PREFIX + "_mapred_test_table";
- private static final String EMPTY_TABLE = PREFIX + "_mapred_empty_table";
-
- private static AssertionError e1 = null;
- private static AssertionError e2 = null;
-
- @Rule
- public TemporaryFolder folder = new TemporaryFolder(new File(System.getProperty("user.dir") + "/target"));
-
- @BeforeClass
- public static void setup() throws Exception {
- MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
- Connector c = mockInstance.getConnector("root", new PasswordToken(""));
- c.tableOperations().create(EMPTY_TABLE);
- c.tableOperations().create(TEST_TABLE);
- c.tableOperations().create(BAD_TABLE);
- BatchWriter bw = c.createBatchWriter(TEST_TABLE, new BatchWriterConfig());
- Mutation m = new Mutation("Key");
- m.put("", "", "");
- bw.addMutation(m);
- bw.close();
- bw = c.createBatchWriter(BAD_TABLE, new BatchWriterConfig());
- m = new Mutation("r1");
- m.put("cf1", "cq1", "A&B");
- m.put("cf1", "cq1", "A&B");
- m.put("cf1", "cq2", "A&");
- bw.addMutation(m);
- bw.close();
- }
-
- @Test
- public void testEmptyWrite() throws Exception {
- handleWriteTests(false);
- }
-
- @Test
- public void testRealWrite() throws Exception {
- handleWriteTests(true);
- }
-
- private static class MRTester extends Configured implements Tool {
- private static class BadKeyMapper implements Mapper<Key,Value,Key,Value> {
-
- int index = 0;
-
- @Override
- public void map(Key key, Value value, OutputCollector<Key,Value> output, Reporter reporter) throws IOException {
- try {
- try {
- output.collect(key, value);
- if (index == 2)
- fail();
- } catch (Exception e) {
- Logger.getLogger(this.getClass()).error(e, e);
- assertEquals(2, index);
- }
- } catch (AssertionError e) {
- e1 = e;
- }
- index++;
- }
-
- @Override
- public void configure(JobConf job) {}
-
- @Override
- public void close() throws IOException {
- try {
- assertEquals(2, index);
- } catch (AssertionError e) {
- e2 = e;
- }
- }
-
- }
-
- @Override
- public int run(String[] args) throws Exception {
-
- if (args.length != 4) {
- throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <user> <pass> <table> <outputfile>");
- }
-
- String user = args[0];
- String pass = args[1];
- String table = args[2];
-
- JobConf job = new JobConf(getConf());
- job.setJarByClass(this.getClass());
-
- job.setInputFormat(AccumuloInputFormat.class);
-
- AccumuloInputFormat.setConnectorInfo(job, user, new PasswordToken(pass));
- AccumuloInputFormat.setInputTableName(job, table);
- AccumuloInputFormat.setMockInstance(job, INSTANCE_NAME);
- AccumuloFileOutputFormat.setOutputPath(job, new Path(args[3]));
-
- job.setMapperClass(BAD_TABLE.equals(table) ? BadKeyMapper.class : IdentityMapper.class);
- job.setMapOutputKeyClass(Key.class);
- job.setMapOutputValueClass(Value.class);
- job.setOutputFormat(AccumuloFileOutputFormat.class);
-
- job.setNumReduceTasks(0);
-
- return JobClient.runJob(job).isSuccessful() ? 0 : 1;
- }
-
- public static void main(String[] args) throws Exception {
- assertEquals(0, ToolRunner.run(CachedConfiguration.getInstance(), new MRTester(), args));
- }
- }
-
- public void handleWriteTests(boolean content) throws Exception {
- File f = folder.newFile("handleWriteTests");
- f.delete();
- MRTester.main(new String[] {"root", "", content ? TEST_TABLE : EMPTY_TABLE, f.getAbsolutePath()});
-
- assertTrue(f.exists());
- File[] files = f.listFiles(new FileFilter() {
- @Override
- public boolean accept(File file) {
- return file.getName().startsWith("part-m-");
- }
- });
- if (content) {
- assertEquals(1, files.length);
- assertTrue(files[0].exists());
- } else {
- assertEquals(0, files.length);
- }
- }
-
- @Test
- public void writeBadVisibility() throws Exception {
- File f = folder.newFile("writeBadVisibility");
- f.delete();
- MRTester.main(new String[] {"root", "", BAD_TABLE, f.getAbsolutePath()});
- Logger.getLogger(this.getClass()).error(e1, e1);
- assertNull(e1);
- assertNull(e2);
- }
-
- @Test
- public void validateConfiguration() throws IOException, InterruptedException {
-
- int a = 7;
- long b = 300l;
- long c = 50l;
- long d = 10l;
- String e = "snappy";
-
- JobConf job = new JobConf();
- AccumuloFileOutputFormat.setReplication(job, a);
- AccumuloFileOutputFormat.setFileBlockSize(job, b);
- AccumuloFileOutputFormat.setDataBlockSize(job, c);
- AccumuloFileOutputFormat.setIndexBlockSize(job, d);
- AccumuloFileOutputFormat.setCompressionType(job, e);
-
- AccumuloConfiguration acuconf = AccumuloFileOutputFormat.getAccumuloConfiguration(job);
-
- assertEquals(7, acuconf.getCount(Property.TABLE_FILE_REPLICATION));
- assertEquals(300l, acuconf.getMemoryInBytes(Property.TABLE_FILE_BLOCK_SIZE));
- assertEquals(50l, acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE));
- assertEquals(10l, acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX));
- assertEquals("snappy", acuconf.get(Property.TABLE_FILE_COMPRESSION_TYPE));
-
- a = 17;
- b = 1300l;
- c = 150l;
- d = 110l;
- e = "lzo";
-
- job = new JobConf();
- AccumuloFileOutputFormat.setReplication(job, a);
- AccumuloFileOutputFormat.setFileBlockSize(job, b);
- AccumuloFileOutputFormat.setDataBlockSize(job, c);
- AccumuloFileOutputFormat.setIndexBlockSize(job, d);
- AccumuloFileOutputFormat.setCompressionType(job, e);
-
- acuconf = AccumuloFileOutputFormat.getAccumuloConfiguration(job);
-
- assertEquals(17, acuconf.getCount(Property.TABLE_FILE_REPLICATION));
- assertEquals(1300l, acuconf.getMemoryInBytes(Property.TABLE_FILE_BLOCK_SIZE));
- assertEquals(150l, acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE));
- assertEquals(110l, acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX));
- assertEquals("lzo", acuconf.get(Property.TABLE_FILE_COMPRESSION_TYPE));
-
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormatTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormatTest.java b/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormatTest.java
deleted file mode 100644
index 13490e0..0000000
--- a/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormatTest.java
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapred;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
-import java.io.ByteArrayOutputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.accumulo.core.client.BatchWriter;
-import org.apache.accumulo.core.client.BatchWriterConfig;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.client.security.tokens.PasswordToken;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.iterators.user.RegExFilter;
-import org.apache.accumulo.core.iterators.user.WholeRowIterator;
-import org.apache.commons.codec.binary.Base64;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.lib.NullOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-public class AccumuloInputFormatTest {
-
- private static final String PREFIX = AccumuloInputFormatTest.class.getSimpleName();
- private static final String INSTANCE_NAME = PREFIX + "_mapred_instance";
- private static final String TEST_TABLE_1 = PREFIX + "_mapred_table_1";
-
- private JobConf job;
-
- @BeforeClass
- public static void setupClass() {
- System.setProperty("hadoop.tmp.dir", System.getProperty("user.dir") + "/target/hadoop-tmp");
- }
-
- @Before
- public void createJob() {
- job = new JobConf();
- }
-
- /**
- * Check that the iterator configuration is getting stored in the Job conf correctly.
- */
- @Test
- public void testSetIterator() throws IOException {
- IteratorSetting is = new IteratorSetting(1, "WholeRow", "org.apache.accumulo.core.iterators.WholeRowIterator");
- AccumuloInputFormat.addIterator(job, is);
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- is.write(new DataOutputStream(baos));
- String iterators = job.get("AccumuloInputFormat.ScanOpts.Iterators");
- assertEquals(new String(Base64.encodeBase64(baos.toByteArray())), iterators);
- }
-
- @Test
- public void testAddIterator() throws IOException {
- AccumuloInputFormat.addIterator(job, new IteratorSetting(1, "WholeRow", WholeRowIterator.class));
- AccumuloInputFormat.addIterator(job, new IteratorSetting(2, "Versions", "org.apache.accumulo.core.iterators.VersioningIterator"));
- IteratorSetting iter = new IteratorSetting(3, "Count", "org.apache.accumulo.core.iterators.CountingIterator");
- iter.addOption("v1", "1");
- iter.addOption("junk", "\0omg:!\\xyzzy");
- AccumuloInputFormat.addIterator(job, iter);
-
- List<IteratorSetting> list = AccumuloInputFormat.getIterators(job);
-
- // Check the list size
- assertTrue(list.size() == 3);
-
- // Walk the list and make sure our settings are correct
- IteratorSetting setting = list.get(0);
- assertEquals(1, setting.getPriority());
- assertEquals("org.apache.accumulo.core.iterators.user.WholeRowIterator", setting.getIteratorClass());
- assertEquals("WholeRow", setting.getName());
- assertEquals(0, setting.getOptions().size());
-
- setting = list.get(1);
- assertEquals(2, setting.getPriority());
- assertEquals("org.apache.accumulo.core.iterators.VersioningIterator", setting.getIteratorClass());
- assertEquals("Versions", setting.getName());
- assertEquals(0, setting.getOptions().size());
-
- setting = list.get(2);
- assertEquals(3, setting.getPriority());
- assertEquals("org.apache.accumulo.core.iterators.CountingIterator", setting.getIteratorClass());
- assertEquals("Count", setting.getName());
- assertEquals(2, setting.getOptions().size());
- assertEquals("1", setting.getOptions().get("v1"));
- assertEquals("\0omg:!\\xyzzy", setting.getOptions().get("junk"));
- }
-
- /**
- * Test adding iterator options where the keys and values contain both the FIELD_SEPARATOR character (':') and ITERATOR_SEPARATOR (',') characters. There
- * should be no exceptions thrown when trying to parse these types of option entries.
- *
- * This test makes sure that the expected raw values, as appears in the Job, are equal to what's expected.
- */
- @Test
- public void testIteratorOptionEncoding() throws Throwable {
- String key = "colon:delimited:key";
- String value = "comma,delimited,value";
- IteratorSetting someSetting = new IteratorSetting(1, "iterator", "Iterator.class");
- someSetting.addOption(key, value);
- AccumuloInputFormat.addIterator(job, someSetting);
-
- List<IteratorSetting> list = AccumuloInputFormat.getIterators(job);
- assertEquals(1, list.size());
- assertEquals(1, list.get(0).getOptions().size());
- assertEquals(list.get(0).getOptions().get(key), value);
-
- someSetting.addOption(key + "2", value);
- someSetting.setPriority(2);
- someSetting.setName("it2");
- AccumuloInputFormat.addIterator(job, someSetting);
- list = AccumuloInputFormat.getIterators(job);
- assertEquals(2, list.size());
- assertEquals(1, list.get(0).getOptions().size());
- assertEquals(list.get(0).getOptions().get(key), value);
- assertEquals(2, list.get(1).getOptions().size());
- assertEquals(list.get(1).getOptions().get(key), value);
- assertEquals(list.get(1).getOptions().get(key + "2"), value);
- }
-
- /**
- * Test getting iterator settings for multiple iterators set
- */
- @Test
- public void testGetIteratorSettings() throws IOException {
- AccumuloInputFormat.addIterator(job, new IteratorSetting(1, "WholeRow", "org.apache.accumulo.core.iterators.WholeRowIterator"));
- AccumuloInputFormat.addIterator(job, new IteratorSetting(2, "Versions", "org.apache.accumulo.core.iterators.VersioningIterator"));
- AccumuloInputFormat.addIterator(job, new IteratorSetting(3, "Count", "org.apache.accumulo.core.iterators.CountingIterator"));
-
- List<IteratorSetting> list = AccumuloInputFormat.getIterators(job);
-
- // Check the list size
- assertTrue(list.size() == 3);
-
- // Walk the list and make sure our settings are correct
- IteratorSetting setting = list.get(0);
- assertEquals(1, setting.getPriority());
- assertEquals("org.apache.accumulo.core.iterators.WholeRowIterator", setting.getIteratorClass());
- assertEquals("WholeRow", setting.getName());
-
- setting = list.get(1);
- assertEquals(2, setting.getPriority());
- assertEquals("org.apache.accumulo.core.iterators.VersioningIterator", setting.getIteratorClass());
- assertEquals("Versions", setting.getName());
-
- setting = list.get(2);
- assertEquals(3, setting.getPriority());
- assertEquals("org.apache.accumulo.core.iterators.CountingIterator", setting.getIteratorClass());
- assertEquals("Count", setting.getName());
-
- }
-
- @Test
- public void testSetRegex() throws IOException {
- String regex = ">\"*%<>\'\\";
-
- IteratorSetting is = new IteratorSetting(50, regex, RegExFilter.class);
- RegExFilter.setRegexs(is, regex, null, null, null, false);
- AccumuloInputFormat.addIterator(job, is);
-
- assertTrue(regex.equals(AccumuloInputFormat.getIterators(job).get(0).getName()));
- }
-
- private static AssertionError e1 = null;
- private static AssertionError e2 = null;
-
- private static class MRTester extends Configured implements Tool {
- private static class TestMapper implements Mapper<Key,Value,Key,Value> {
- Key key = null;
- int count = 0;
-
- @Override
- public void map(Key k, Value v, OutputCollector<Key,Value> output, Reporter reporter) throws IOException {
- try {
- if (key != null)
- assertEquals(key.getRow().toString(), new String(v.get()));
- assertEquals(k.getRow(), new Text(String.format("%09x", count + 1)));
- assertEquals(new String(v.get()), String.format("%09x", count));
- } catch (AssertionError e) {
- e1 = e;
- }
- key = new Key(k);
- count++;
- }
-
- @Override
- public void configure(JobConf job) {}
-
- @Override
- public void close() throws IOException {
- try {
- assertEquals(100, count);
- } catch (AssertionError e) {
- e2 = e;
- }
- }
-
- }
-
- @Override
- public int run(String[] args) throws Exception {
-
- if (args.length != 3) {
- throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <user> <pass> <table>");
- }
-
- String user = args[0];
- String pass = args[1];
- String table = args[2];
-
- JobConf job = new JobConf(getConf());
- job.setJarByClass(this.getClass());
-
- job.setInputFormat(AccumuloInputFormat.class);
-
- AccumuloInputFormat.setConnectorInfo(job, user, new PasswordToken(pass));
- AccumuloInputFormat.setInputTableName(job, table);
- AccumuloInputFormat.setMockInstance(job, INSTANCE_NAME);
-
- job.setMapperClass(TestMapper.class);
- job.setMapOutputKeyClass(Key.class);
- job.setMapOutputValueClass(Value.class);
- job.setOutputFormat(NullOutputFormat.class);
-
- job.setNumReduceTasks(0);
-
- return JobClient.runJob(job).isSuccessful() ? 0 : 1;
- }
-
- public static void main(String... args) throws Exception {
- assertEquals(0, ToolRunner.run(new Configuration(), new MRTester(), args));
- }
- }
-
- @Test
- public void testMap() throws Exception {
- MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
- Connector c = mockInstance.getConnector("root", new PasswordToken(""));
- c.tableOperations().create(TEST_TABLE_1);
- BatchWriter bw = c.createBatchWriter(TEST_TABLE_1, new BatchWriterConfig());
- for (int i = 0; i < 100; i++) {
- Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
- m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
- bw.addMutation(m);
- }
- bw.close();
-
- MRTester.main("root", "", TEST_TABLE_1);
- assertNull(e1);
- assertNull(e2);
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormatTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormatTest.java b/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormatTest.java
deleted file mode 100644
index 2864016..0000000
--- a/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormatTest.java
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapred;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-
-import java.io.IOException;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.accumulo.core.client.BatchWriter;
-import org.apache.accumulo.core.client.BatchWriterConfig;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.client.mapreduce.InputTableConfig;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.client.security.tokens.PasswordToken;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.util.CachedConfiguration;
-import org.apache.accumulo.core.util.Pair;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.lib.NullOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.Test;
-
-public class AccumuloMultiTableInputFormatTest {
-
- private static final String PREFIX = AccumuloMultiTableInputFormatTest.class.getSimpleName();
- private static final String INSTANCE_NAME = PREFIX + "_mapred_instance";
- private static final String TEST_TABLE_1 = PREFIX + "_mapred_table_1";
- private static final String TEST_TABLE_2 = PREFIX + "_mapred_table_2";
-
- private static AssertionError e1 = null;
- private static AssertionError e2 = null;
-
- private static class MRTester extends Configured implements Tool {
- private static class TestMapper implements Mapper<Key,Value,Key,Value> {
- Key key = null;
- int count = 0;
-
- @Override
- public void map(Key k, Value v, OutputCollector<Key,Value> output, Reporter reporter) throws IOException {
- try {
- String tableName = ((RangeInputSplit) reporter.getInputSplit()).getTableName();
- if (key != null)
- assertEquals(key.getRow().toString(), new String(v.get()));
- assertEquals(new Text(String.format("%s_%09x", tableName, count + 1)), k.getRow());
- assertEquals(String.format("%s_%09x", tableName, count), new String(v.get()));
- } catch (AssertionError e) {
- e1 = e;
- }
- key = new Key(k);
- count++;
- }
-
- @Override
- public void configure(JobConf job) {}
-
- @Override
- public void close() throws IOException {
- try {
- assertEquals(100, count);
- } catch (AssertionError e) {
- e2 = e;
- }
- }
-
- }
-
- @Override
- public int run(String[] args) throws Exception {
-
- if (args.length != 4) {
- throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <user> <pass> <table1> <table2>");
- }
-
- String user = args[0];
- String pass = args[1];
- String table1 = args[2];
- String table2 = args[3];
-
- JobConf job = new JobConf(getConf());
- job.setJarByClass(this.getClass());
-
- job.setInputFormat(AccumuloInputFormat.class);
-
- AccumuloMultiTableInputFormat.setConnectorInfo(job, user, new PasswordToken(pass));
- AccumuloMultiTableInputFormat.setMockInstance(job, INSTANCE_NAME);
-
- InputTableConfig tableConfig1 = new InputTableConfig();
- InputTableConfig tableConfig2 = new InputTableConfig();
-
- Map<String,InputTableConfig> configMap = new HashMap<String,InputTableConfig>();
- configMap.put(table1, tableConfig1);
- configMap.put(table2, tableConfig2);
-
- AccumuloMultiTableInputFormat.setInputTableConfigs(job, configMap);
-
- job.setMapperClass(TestMapper.class);
- job.setMapOutputKeyClass(Key.class);
- job.setMapOutputValueClass(Value.class);
- job.setOutputFormat(NullOutputFormat.class);
-
- job.setNumReduceTasks(0);
-
- return JobClient.runJob(job).isSuccessful() ? 0 : 1;
- }
-
- public static void main(String[] args) throws Exception {
- assertEquals(0, ToolRunner.run(CachedConfiguration.getInstance(), new MRTester(), args));
- }
- }
-
- @Test
- public void testMap() throws Exception {
- MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
- Connector c = mockInstance.getConnector("root", new PasswordToken(""));
- c.tableOperations().create(TEST_TABLE_1);
- c.tableOperations().create(TEST_TABLE_2);
- BatchWriter bw = c.createBatchWriter(TEST_TABLE_1, new BatchWriterConfig());
- BatchWriter bw2 = c.createBatchWriter(TEST_TABLE_2, new BatchWriterConfig());
- for (int i = 0; i < 100; i++) {
- Mutation t1m = new Mutation(new Text(String.format("%s_%09x", TEST_TABLE_1, i + 1)));
- t1m.put(new Text(), new Text(), new Value(String.format("%s_%09x", TEST_TABLE_1, i).getBytes()));
- bw.addMutation(t1m);
- Mutation t2m = new Mutation(new Text(String.format("%s_%09x", TEST_TABLE_2, i + 1)));
- t2m.put(new Text(), new Text(), new Value(String.format("%s_%09x", TEST_TABLE_2, i).getBytes()));
- bw2.addMutation(t2m);
- }
- bw.close();
- bw2.close();
-
- MRTester.main(new String[] {"root", "", TEST_TABLE_1, TEST_TABLE_2});
- assertNull(e1);
- assertNull(e2);
- }
-
- /**
- * Verify {@link org.apache.accumulo.core.client.mapreduce.InputTableConfig} objects get correctly serialized in the JobContext.
- */
- @Test
- public void testTableQueryConfigSerialization() throws IOException {
-
- JobConf job = new JobConf();
-
- InputTableConfig table1 = new InputTableConfig().setRanges(Collections.singletonList(new Range("a", "b")))
- .fetchColumns(Collections.singleton(new Pair<Text,Text>(new Text("CF1"), new Text("CQ1"))))
- .setIterators(Collections.singletonList(new IteratorSetting(50, "iter1", "iterclass1")));
-
- InputTableConfig table2 = new InputTableConfig().setRanges(Collections.singletonList(new Range("a", "b")))
- .fetchColumns(Collections.singleton(new Pair<Text,Text>(new Text("CF1"), new Text("CQ1"))))
- .setIterators(Collections.singletonList(new IteratorSetting(50, "iter1", "iterclass1")));
-
- Map<String,InputTableConfig> configMap = new HashMap<String,InputTableConfig>();
- configMap.put(TEST_TABLE_1, table1);
- configMap.put(TEST_TABLE_2, table2);
- AccumuloMultiTableInputFormat.setInputTableConfigs(job, configMap);
-
- assertEquals(table1, AccumuloMultiTableInputFormat.getInputTableConfig(job, TEST_TABLE_1));
- assertEquals(table2, AccumuloMultiTableInputFormat.getInputTableConfig(job, TEST_TABLE_2));
- }
-}
[02/12] ACCUMULO-1880 create mapreduce module
Posted by md...@apache.org.
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/EmptySplitsAccumuloInputFormat.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/EmptySplitsAccumuloInputFormat.java b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/EmptySplitsAccumuloInputFormat.java
new file mode 100644
index 0000000..dd531c0
--- /dev/null
+++ b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/EmptySplitsAccumuloInputFormat.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+
+/**
+ * AccumuloInputFormat which returns an "empty" RangeInputSplit
+ */
+public class EmptySplitsAccumuloInputFormat extends AccumuloInputFormat {
+
+ @Override
+ public List<InputSplit> getSplits(JobContext context) throws IOException {
+ List<InputSplit> oldSplits = super.getSplits(context);
+ List<InputSplit> newSplits = new ArrayList<InputSplit>(oldSplits.size());
+
+ // Copy only the necessary information
+ for (InputSplit oldSplit : oldSplits) {
+ org.apache.accumulo.core.client.mapreduce.RangeInputSplit newSplit = new org.apache.accumulo.core.client.mapreduce.RangeInputSplit(
+ (org.apache.accumulo.core.client.mapreduce.RangeInputSplit) oldSplit);
+ newSplits.add(newSplit);
+ }
+
+ return newSplits;
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/InputTableConfigTest.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/InputTableConfigTest.java b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/InputTableConfigTest.java
new file mode 100644
index 0000000..7f5c7d8
--- /dev/null
+++ b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/InputTableConfigTest.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.mapreduce.InputTableConfig;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.util.Pair;
+import org.apache.hadoop.io.Text;
+import org.junit.Before;
+import org.junit.Test;
+
+public class InputTableConfigTest {
+
+ private InputTableConfig tableQueryConfig;
+
+ @Before
+ public void setUp() {
+ tableQueryConfig = new InputTableConfig();
+ }
+
+ @Test
+ public void testSerialization_OnlyTable() throws IOException {
+ byte[] serialized = serialize(tableQueryConfig);
+ InputTableConfig actualConfig = deserialize(serialized);
+
+ assertEquals(tableQueryConfig, actualConfig);
+ }
+
+ @Test
+ public void testSerialization_ranges() throws IOException {
+ List<Range> ranges = new ArrayList<Range>();
+ ranges.add(new Range("a", "b"));
+ ranges.add(new Range("c", "d"));
+ tableQueryConfig.setRanges(ranges);
+
+ byte[] serialized = serialize(tableQueryConfig);
+ InputTableConfig actualConfig = deserialize(serialized);
+
+ assertEquals(ranges, actualConfig.getRanges());
+ }
+
+ @Test
+ public void testSerialization_columns() throws IOException {
+ Set<Pair<Text,Text>> columns = new HashSet<Pair<Text,Text>>();
+ columns.add(new Pair<Text,Text>(new Text("cf1"), new Text("cq1")));
+ columns.add(new Pair<Text,Text>(new Text("cf2"), null));
+ tableQueryConfig.fetchColumns(columns);
+
+ byte[] serialized = serialize(tableQueryConfig);
+ InputTableConfig actualConfig = deserialize(serialized);
+
+ assertEquals(actualConfig.getFetchedColumns(), columns);
+ }
+
+ @Test
+ public void testSerialization_iterators() throws IOException {
+ List<IteratorSetting> settings = new ArrayList<IteratorSetting>();
+ settings.add(new IteratorSetting(50, "iter", "iterclass"));
+ settings.add(new IteratorSetting(55, "iter2", "iterclass2"));
+ tableQueryConfig.setIterators(settings);
+ byte[] serialized = serialize(tableQueryConfig);
+ InputTableConfig actualConfig = deserialize(serialized);
+ assertEquals(actualConfig.getIterators(), settings);
+
+ }
+
+ private byte[] serialize(InputTableConfig tableQueryConfig) throws IOException {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ tableQueryConfig.write(new DataOutputStream(baos));
+ baos.close();
+ return baos.toByteArray();
+ }
+
+ private InputTableConfig deserialize(byte[] bytes) throws IOException {
+ ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
+ InputTableConfig actualConfig = new InputTableConfig(new DataInputStream(bais));
+ bais.close();
+ return actualConfig;
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplitTest.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplitTest.java b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplitTest.java
new file mode 100644
index 0000000..80e8c28
--- /dev/null
+++ b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplitTest.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.accumulo.core.client.security.tokens.PasswordToken;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.Pair;
+import org.apache.hadoop.io.Text;
+import org.apache.log4j.Level;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class RangeInputSplitTest {
+
+ @Test
+ public void testSimpleWritable() throws IOException {
+ RangeInputSplit split = new RangeInputSplit("table", "1", new Range(new Key("a"), new Key("b")), new String[]{"localhost"});
+
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ DataOutputStream dos = new DataOutputStream(baos);
+ split.write(dos);
+
+ RangeInputSplit newSplit = new RangeInputSplit();
+
+ ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
+ DataInputStream dis = new DataInputStream(bais);
+ newSplit.readFields(dis);
+
+ Assert.assertEquals(split.getTableName(), newSplit.getTableName());
+ Assert.assertEquals(split.getTableId(), newSplit.getTableId());
+ Assert.assertEquals(split.getRange(), newSplit.getRange());
+ Assert.assertTrue(Arrays.equals(split.getLocations(), newSplit.getLocations()));
+ }
+
+ @Test
+ public void testAllFieldsWritable() throws IOException {
+ RangeInputSplit split = new RangeInputSplit("table", "1", new Range(new Key("a"), new Key("b")), new String[]{"localhost"});
+
+ Set<Pair<Text,Text>> fetchedColumns = new HashSet<Pair<Text,Text>>();
+
+ fetchedColumns.add(new Pair<Text,Text>(new Text("colf1"), new Text("colq1")));
+ fetchedColumns.add(new Pair<Text,Text>(new Text("colf2"), new Text("colq2")));
+
+ split.setAuths(new Authorizations("foo"));
+ split.setOffline(true);
+ split.setIsolatedScan(true);
+ split.setUsesLocalIterators(true);
+ split.setFetchedColumns(fetchedColumns);
+ split.setToken(new PasswordToken("password"));
+ split.setPrincipal("root");
+ split.setInstanceName("instance");
+ split.setMockInstance(true);
+ split.setZooKeepers("localhost");
+ split.setLogLevel(Level.WARN);
+
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ DataOutputStream dos = new DataOutputStream(baos);
+ split.write(dos);
+
+ RangeInputSplit newSplit = new RangeInputSplit();
+
+ ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
+ DataInputStream dis = new DataInputStream(bais);
+ newSplit.readFields(dis);
+
+ Assert.assertEquals(split.getRange(), newSplit.getRange());
+ Assert.assertArrayEquals(split.getLocations(), newSplit.getLocations());
+
+ Assert.assertEquals(split.getAuths(), newSplit.getAuths());
+ Assert.assertEquals(split.isOffline(), newSplit.isOffline());
+ Assert.assertEquals(split.isIsolatedScan(), newSplit.isOffline());
+ Assert.assertEquals(split.usesLocalIterators(), newSplit.usesLocalIterators());
+ Assert.assertEquals(split.getFetchedColumns(), newSplit.getFetchedColumns());
+ Assert.assertEquals(split.getToken(), newSplit.getToken());
+ Assert.assertEquals(split.getPrincipal(), newSplit.getPrincipal());
+ Assert.assertEquals(split.getInstanceName(), newSplit.getInstanceName());
+ Assert.assertEquals(split.isMockInstance(), newSplit.isMockInstance());
+ Assert.assertEquals(split.getZooKeepers(), newSplit.getZooKeepers());
+ Assert.assertEquals(split.getLogLevel(), newSplit.getLogLevel());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/TokenFileTest.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/TokenFileTest.java b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/TokenFileTest.java
new file mode 100644
index 0000000..fd207a1
--- /dev/null
+++ b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/TokenFileTest.java
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Iterator;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.BatchWriterConfig;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.PasswordToken;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.security.Credentials;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+/**
+ *
+ */
+public class TokenFileTest {
+ private static AssertionError e1 = null;
+ private static final String PREFIX = TokenFileTest.class.getSimpleName();
+ private static final String INSTANCE_NAME = PREFIX + "_mapreduce_instance";
+ private static final String TEST_TABLE_1 = PREFIX + "_mapreduce_table_1";
+ private static final String TEST_TABLE_2 = PREFIX + "_mapreduce_table_2";
+
+ private static class MRTokenFileTester extends Configured implements Tool {
+ private static class TestMapper extends Mapper<Key,Value,Text,Mutation> {
+ Key key = null;
+ int count = 0;
+
+ @Override
+ protected void map(Key k, Value v, Context context) throws IOException, InterruptedException {
+ try {
+ if (key != null)
+ assertEquals(key.getRow().toString(), new String(v.get()));
+ assertEquals(k.getRow(), new Text(String.format("%09x", count + 1)));
+ assertEquals(new String(v.get()), String.format("%09x", count));
+ } catch (AssertionError e) {
+ e1 = e;
+ }
+ key = new Key(k);
+ count++;
+ }
+
+ @Override
+ protected void cleanup(Context context) throws IOException, InterruptedException {
+ Mutation m = new Mutation("total");
+ m.put("", "", Integer.toString(count));
+ context.write(new Text(), m);
+ }
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+
+ if (args.length != 4) {
+ throw new IllegalArgumentException("Usage : " + MRTokenFileTester.class.getName() + " <user> <token file> <inputtable> <outputtable>");
+ }
+
+ String user = args[0];
+ String tokenFile = args[1];
+ String table1 = args[2];
+ String table2 = args[3];
+
+ @SuppressWarnings("deprecation")
+ Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
+ job.setJarByClass(this.getClass());
+
+ job.setInputFormatClass(AccumuloInputFormat.class);
+
+ AccumuloInputFormat.setConnectorInfo(job, user, tokenFile);
+ AccumuloInputFormat.setInputTableName(job, table1);
+ AccumuloInputFormat.setMockInstance(job, INSTANCE_NAME);
+
+ job.setMapperClass(TestMapper.class);
+ job.setMapOutputKeyClass(Key.class);
+ job.setMapOutputValueClass(Value.class);
+ job.setOutputFormatClass(AccumuloOutputFormat.class);
+ job.setOutputKeyClass(Text.class);
+ job.setOutputValueClass(Mutation.class);
+
+ AccumuloOutputFormat.setConnectorInfo(job, user, tokenFile);
+ AccumuloOutputFormat.setCreateTables(job, false);
+ AccumuloOutputFormat.setDefaultTableName(job, table2);
+ AccumuloOutputFormat.setMockInstance(job, INSTANCE_NAME);
+
+ job.setNumReduceTasks(0);
+
+ job.waitForCompletion(true);
+
+ return job.isSuccessful() ? 0 : 1;
+ }
+
+ public static void main(String[] args) throws Exception {
+ Configuration conf = CachedConfiguration.getInstance();
+ conf.set("hadoop.tmp.dir", new File(args[1]).getParent());
+ assertEquals(0, ToolRunner.run(conf, new MRTokenFileTester(), args));
+ }
+ }
+
+ @Rule
+ public TemporaryFolder folder = new TemporaryFolder(new File(System.getProperty("user.dir") + "/target"));
+
+ @Test
+ public void testMR() throws Exception {
+ MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
+ Connector c = mockInstance.getConnector("root", new PasswordToken(""));
+ c.tableOperations().create(TEST_TABLE_1);
+ c.tableOperations().create(TEST_TABLE_2);
+ BatchWriter bw = c.createBatchWriter(TEST_TABLE_1, new BatchWriterConfig());
+ for (int i = 0; i < 100; i++) {
+ Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
+ m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
+ bw.addMutation(m);
+ }
+ bw.close();
+
+ File tf = folder.newFile("root_test.pw");
+ PrintStream out = new PrintStream(tf);
+ String outString = new Credentials("root", new PasswordToken("")).serialize();
+ out.println(outString);
+ out.close();
+
+ MRTokenFileTester.main(new String[] {"root", tf.getAbsolutePath(), TEST_TABLE_1, TEST_TABLE_2});
+ assertNull(e1);
+
+ Scanner scanner = c.createScanner(TEST_TABLE_2, new Authorizations());
+ Iterator<Entry<Key,Value>> iter = scanner.iterator();
+ assertTrue(iter.hasNext());
+ Entry<Key,Value> entry = iter.next();
+ assertEquals(Integer.parseInt(new String(entry.getValue().get())), 100);
+ assertFalse(iter.hasNext());
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/lib/impl/ConfiguratorBaseTest.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/lib/impl/ConfiguratorBaseTest.java b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/lib/impl/ConfiguratorBaseTest.java
new file mode 100644
index 0000000..1983470
--- /dev/null
+++ b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/lib/impl/ConfiguratorBaseTest.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce.lib.impl;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.ClientConfiguration;
+import org.apache.accumulo.core.client.ClientConfiguration.ClientProperty;
+import org.apache.accumulo.core.client.Instance;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
+import org.apache.accumulo.core.client.security.tokens.AuthenticationToken.AuthenticationTokenSerializer;
+import org.apache.accumulo.core.client.security.tokens.PasswordToken;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.junit.Test;
+
+/**
+ *
+ */
+public class ConfiguratorBaseTest {
+
+ private static enum PrivateTestingEnum {
+ SOMETHING, SOMETHING_ELSE
+ }
+
+ @Test
+ public void testEnumToConfKey() {
+ assertEquals(this.getClass().getSimpleName() + ".PrivateTestingEnum.Something",
+ ConfiguratorBase.enumToConfKey(this.getClass(), PrivateTestingEnum.SOMETHING));
+ assertEquals(this.getClass().getSimpleName() + ".PrivateTestingEnum.SomethingElse",
+ ConfiguratorBase.enumToConfKey(this.getClass(), PrivateTestingEnum.SOMETHING_ELSE));
+ }
+
+ @Test
+ public void testSetConnectorInfoClassOfQConfigurationStringAuthenticationToken() throws AccumuloSecurityException {
+ Configuration conf = new Configuration();
+ assertFalse(ConfiguratorBase.isConnectorInfoSet(this.getClass(), conf));
+ ConfiguratorBase.setConnectorInfo(this.getClass(), conf, "testUser", new PasswordToken("testPassword"));
+ assertTrue(ConfiguratorBase.isConnectorInfoSet(this.getClass(), conf));
+ assertEquals("testUser", ConfiguratorBase.getPrincipal(this.getClass(), conf));
+ AuthenticationToken token = ConfiguratorBase.getAuthenticationToken(this.getClass(), conf);
+ assertEquals(PasswordToken.class, token.getClass());
+ assertEquals(new PasswordToken("testPassword"), token);
+ assertEquals(
+ "inline:" + PasswordToken.class.getName() + ":" + Base64.encodeBase64String(AuthenticationTokenSerializer.serialize(new PasswordToken("testPassword"))),
+ conf.get(ConfiguratorBase.enumToConfKey(this.getClass(), ConfiguratorBase.ConnectorInfo.TOKEN)));
+ }
+
+ @Test
+ public void testSetConnectorInfoClassOfQConfigurationStringString() throws AccumuloSecurityException {
+ Configuration conf = new Configuration();
+ assertFalse(ConfiguratorBase.isConnectorInfoSet(this.getClass(), conf));
+ ConfiguratorBase.setConnectorInfo(this.getClass(), conf, "testUser", "testFile");
+ assertTrue(ConfiguratorBase.isConnectorInfoSet(this.getClass(), conf));
+ assertEquals("testUser", ConfiguratorBase.getPrincipal(this.getClass(), conf));
+ assertEquals("file:testFile", conf.get(ConfiguratorBase.enumToConfKey(this.getClass(), ConfiguratorBase.ConnectorInfo.TOKEN)));
+ }
+
+ @Test
+ public void testSetZooKeeperInstance() {
+ Configuration conf = new Configuration();
+ ConfiguratorBase.setZooKeeperInstance(this.getClass(), conf, new ClientConfiguration().withInstance("testInstanceName").withZkHosts("testZooKeepers")
+ .withSsl(true).withZkTimeout(1234));
+ ClientConfiguration clientConf = ClientConfiguration.deserialize(conf.get(ConfiguratorBase.enumToConfKey(this.getClass(),
+ ConfiguratorBase.InstanceOpts.CLIENT_CONFIG)));
+ assertEquals("testInstanceName", clientConf.get(ClientProperty.INSTANCE_NAME));
+ assertEquals("testZooKeepers", clientConf.get(ClientProperty.INSTANCE_ZK_HOST));
+ assertEquals("true", clientConf.get(ClientProperty.INSTANCE_RPC_SSL_ENABLED));
+ assertEquals("1234", clientConf.get(ClientProperty.INSTANCE_ZK_TIMEOUT));
+ assertEquals(ZooKeeperInstance.class.getSimpleName(), conf.get(ConfiguratorBase.enumToConfKey(this.getClass(), ConfiguratorBase.InstanceOpts.TYPE)));
+
+ Instance instance = ConfiguratorBase.getInstance(this.getClass(), conf);
+ assertEquals(ZooKeeperInstance.class.getName(), instance.getClass().getName());
+ assertEquals("testInstanceName", ((ZooKeeperInstance) instance).getInstanceName());
+ assertEquals("testZooKeepers", ((ZooKeeperInstance) instance).getZooKeepers());
+ assertEquals(1234000, ((ZooKeeperInstance) instance).getZooKeepersSessionTimeOut());
+ }
+
+ @Test
+ public void testSetMockInstance() {
+ Configuration conf = new Configuration();
+ ConfiguratorBase.setMockInstance(this.getClass(), conf, "testInstanceName");
+ assertEquals("testInstanceName", conf.get(ConfiguratorBase.enumToConfKey(this.getClass(), ConfiguratorBase.InstanceOpts.NAME)));
+ assertEquals(null, conf.get(ConfiguratorBase.enumToConfKey(this.getClass(), ConfiguratorBase.InstanceOpts.ZOO_KEEPERS)));
+ assertEquals(MockInstance.class.getSimpleName(), conf.get(ConfiguratorBase.enumToConfKey(this.getClass(), ConfiguratorBase.InstanceOpts.TYPE)));
+ Instance instance = ConfiguratorBase.getInstance(this.getClass(), conf);
+ assertEquals(MockInstance.class.getName(), instance.getClass().getName());
+ }
+
+ @Test
+ public void testSetLogLevel() {
+ Configuration conf = new Configuration();
+ Level currentLevel = Logger.getLogger(this.getClass()).getLevel();
+
+ ConfiguratorBase.setLogLevel(this.getClass(), conf, Level.DEBUG);
+ Logger.getLogger(this.getClass()).setLevel(currentLevel);
+ assertEquals(Level.DEBUG, ConfiguratorBase.getLogLevel(this.getClass(), conf));
+
+ ConfiguratorBase.setLogLevel(this.getClass(), conf, Level.INFO);
+ Logger.getLogger(this.getClass()).setLevel(currentLevel);
+ assertEquals(Level.INFO, ConfiguratorBase.getLogLevel(this.getClass(), conf));
+
+ ConfiguratorBase.setLogLevel(this.getClass(), conf, Level.FATAL);
+ Logger.getLogger(this.getClass()).setLevel(currentLevel);
+ assertEquals(Level.FATAL, ConfiguratorBase.getLogLevel(this.getClass(), conf));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/lib/partition/RangePartitionerTest.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/lib/partition/RangePartitionerTest.java b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/lib/partition/RangePartitionerTest.java
new file mode 100644
index 0000000..8fca169
--- /dev/null
+++ b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/lib/partition/RangePartitionerTest.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce.lib.partition;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.junit.Test;
+
+public class RangePartitionerTest {
+
+ private static Text[] cutArray = new Text[] {new Text("A"), new Text("B"), new Text("C")};
+
+ @Test
+ public void testNoSubBins() throws IOException {
+ for (int i = -2; i < 2; ++i) {
+ checkExpectedBins(i, new String[] {"A", "B", "C"}, new int[] {0, 1, 2});
+ checkExpectedBins(i, new String[] {"C", "A", "B"}, new int[] {2, 0, 1});
+ checkExpectedBins(i, new String[] {"", "AA", "BB", "CC"}, new int[] {0, 1, 2, 3});
+ }
+ }
+
+ @Test
+ public void testSubBins() throws IOException {
+ checkExpectedRangeBins(2, new String[] {"A", "B", "C"}, new int[] {1, 3, 5});
+ checkExpectedRangeBins(2, new String[] {"C", "A", "B"}, new int[] {5, 1, 3});
+ checkExpectedRangeBins(2, new String[] {"", "AA", "BB", "CC"}, new int[] {1, 3, 5, 7});
+
+ checkExpectedRangeBins(3, new String[] {"A", "B", "C"}, new int[] {2, 5, 8});
+ checkExpectedRangeBins(3, new String[] {"C", "A", "B"}, new int[] {8, 2, 5});
+ checkExpectedRangeBins(3, new String[] {"", "AA", "BB", "CC"}, new int[] {2, 5, 8, 11});
+
+ checkExpectedRangeBins(10, new String[] {"A", "B", "C"}, new int[] {9, 19, 29});
+ checkExpectedRangeBins(10, new String[] {"C", "A", "B"}, new int[] {29, 9, 19});
+ checkExpectedRangeBins(10, new String[] {"", "AA", "BB", "CC"}, new int[] {9, 19, 29, 39});
+ }
+
+ private RangePartitioner prepPartitioner(int numSubBins) throws IOException {
+ @SuppressWarnings("deprecation")
+ Job job = new Job();
+ RangePartitioner.setNumSubBins(job, numSubBins);
+ RangePartitioner rp = new RangePartitioner();
+ rp.setConf(job.getConfiguration());
+ return rp;
+ }
+
+ private void checkExpectedRangeBins(int numSubBins, String[] strings, int[] rangeEnds) throws IOException {
+ assertTrue(strings.length == rangeEnds.length);
+ for (int i = 0; i < strings.length; ++i) {
+ int endRange = rangeEnds[i];
+ int startRange = endRange + 1 - numSubBins;
+ int part = prepPartitioner(numSubBins).findPartition(new Text(strings[i]), cutArray, numSubBins);
+ assertTrue(part >= startRange);
+ assertTrue(part <= endRange);
+ }
+ }
+
+ private void checkExpectedBins(int numSubBins, String[] strings, int[] bins) throws IOException {
+ assertTrue(strings.length == bins.length);
+ for (int i = 0; i < strings.length; ++i) {
+ int bin = bins[i], part = prepPartitioner(numSubBins).findPartition(new Text(strings[i]), cutArray, numSubBins);
+ assertTrue(bin == part);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/mapreduce/src/test/resources/log4j.properties b/mapreduce/src/test/resources/log4j.properties
new file mode 100644
index 0000000..9f968f8
--- /dev/null
+++ b/mapreduce/src/test/resources/log4j.properties
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+log4j.rootLogger=INFO, CA
+log4j.appender.CA=org.apache.log4j.ConsoleAppender
+log4j.appender.CA.layout=org.apache.log4j.PatternLayout
+log4j.appender.CA.layout.ConversionPattern=[%t] %-5p %c %x - %m%n
+
+log4j.logger.org.apache.accumulo.core.iterators.system.VisibilityFilter=FATAL
+log4j.logger.org.apache.accumulo.core.iterators.user.TransformingIteratorTest$IllegalVisCompactionKeyTransformingIterator=FATAL
+log4j.logger.org.apache.accumulo.core.iterators.user.TransformingIteratorTest$IllegalVisKeyTransformingIterator=FATAL
+log4j.logger.org.apache.commons.vfs2.impl.DefaultFileSystemManager=WARN
+log4j.logger.org.apache.hadoop.mapred=ERROR
+log4j.logger.org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter=ERROR
+log4j.logger.org.apache.hadoop.util.ProcessTree=ERROR
+log4j.logger.org.apache.accumulo.core.util.format=FATAL
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 1529d14..3a0953b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -75,6 +75,7 @@
<module>trace</module>
<module>core</module>
<module>shell</module>
+ <module>mapreduce</module>
<module>fate</module>
<module>start</module>
<module>examples/simple</module>
@@ -248,6 +249,11 @@
</dependency>
<dependency>
<groupId>org.apache.accumulo</groupId>
+ <artifactId>accumulo-mapreduce</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.accumulo</groupId>
<artifactId>accumulo-master</artifactId>
<version>${project.version}</version>
</dependency>
[07/12] ACCUMULO-1880 create mapreduce module
Posted by md...@apache.org.
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/test/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplitTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplitTest.java b/core/src/test/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplitTest.java
deleted file mode 100644
index 80e8c28..0000000
--- a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplitTest.java
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.Set;
-
-import org.apache.accumulo.core.client.security.tokens.PasswordToken;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.core.util.Pair;
-import org.apache.hadoop.io.Text;
-import org.apache.log4j.Level;
-import org.junit.Assert;
-import org.junit.Test;
-
-public class RangeInputSplitTest {
-
- @Test
- public void testSimpleWritable() throws IOException {
- RangeInputSplit split = new RangeInputSplit("table", "1", new Range(new Key("a"), new Key("b")), new String[]{"localhost"});
-
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- DataOutputStream dos = new DataOutputStream(baos);
- split.write(dos);
-
- RangeInputSplit newSplit = new RangeInputSplit();
-
- ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
- DataInputStream dis = new DataInputStream(bais);
- newSplit.readFields(dis);
-
- Assert.assertEquals(split.getTableName(), newSplit.getTableName());
- Assert.assertEquals(split.getTableId(), newSplit.getTableId());
- Assert.assertEquals(split.getRange(), newSplit.getRange());
- Assert.assertTrue(Arrays.equals(split.getLocations(), newSplit.getLocations()));
- }
-
- @Test
- public void testAllFieldsWritable() throws IOException {
- RangeInputSplit split = new RangeInputSplit("table", "1", new Range(new Key("a"), new Key("b")), new String[]{"localhost"});
-
- Set<Pair<Text,Text>> fetchedColumns = new HashSet<Pair<Text,Text>>();
-
- fetchedColumns.add(new Pair<Text,Text>(new Text("colf1"), new Text("colq1")));
- fetchedColumns.add(new Pair<Text,Text>(new Text("colf2"), new Text("colq2")));
-
- split.setAuths(new Authorizations("foo"));
- split.setOffline(true);
- split.setIsolatedScan(true);
- split.setUsesLocalIterators(true);
- split.setFetchedColumns(fetchedColumns);
- split.setToken(new PasswordToken("password"));
- split.setPrincipal("root");
- split.setInstanceName("instance");
- split.setMockInstance(true);
- split.setZooKeepers("localhost");
- split.setLogLevel(Level.WARN);
-
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- DataOutputStream dos = new DataOutputStream(baos);
- split.write(dos);
-
- RangeInputSplit newSplit = new RangeInputSplit();
-
- ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
- DataInputStream dis = new DataInputStream(bais);
- newSplit.readFields(dis);
-
- Assert.assertEquals(split.getRange(), newSplit.getRange());
- Assert.assertArrayEquals(split.getLocations(), newSplit.getLocations());
-
- Assert.assertEquals(split.getAuths(), newSplit.getAuths());
- Assert.assertEquals(split.isOffline(), newSplit.isOffline());
- Assert.assertEquals(split.isIsolatedScan(), newSplit.isOffline());
- Assert.assertEquals(split.usesLocalIterators(), newSplit.usesLocalIterators());
- Assert.assertEquals(split.getFetchedColumns(), newSplit.getFetchedColumns());
- Assert.assertEquals(split.getToken(), newSplit.getToken());
- Assert.assertEquals(split.getPrincipal(), newSplit.getPrincipal());
- Assert.assertEquals(split.getInstanceName(), newSplit.getInstanceName());
- Assert.assertEquals(split.isMockInstance(), newSplit.isMockInstance());
- Assert.assertEquals(split.getZooKeepers(), newSplit.getZooKeepers());
- Assert.assertEquals(split.getLogLevel(), newSplit.getLogLevel());
- }
-
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/test/java/org/apache/accumulo/core/client/mapreduce/TokenFileTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/TokenFileTest.java b/core/src/test/java/org/apache/accumulo/core/client/mapreduce/TokenFileTest.java
deleted file mode 100644
index fd207a1..0000000
--- a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/TokenFileTest.java
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.Iterator;
-import java.util.Map.Entry;
-
-import org.apache.accumulo.core.client.BatchWriter;
-import org.apache.accumulo.core.client.BatchWriterConfig;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.Scanner;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.client.security.tokens.PasswordToken;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.core.security.Credentials;
-import org.apache.accumulo.core.util.CachedConfiguration;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-
-/**
- *
- */
-public class TokenFileTest {
- private static AssertionError e1 = null;
- private static final String PREFIX = TokenFileTest.class.getSimpleName();
- private static final String INSTANCE_NAME = PREFIX + "_mapreduce_instance";
- private static final String TEST_TABLE_1 = PREFIX + "_mapreduce_table_1";
- private static final String TEST_TABLE_2 = PREFIX + "_mapreduce_table_2";
-
- private static class MRTokenFileTester extends Configured implements Tool {
- private static class TestMapper extends Mapper<Key,Value,Text,Mutation> {
- Key key = null;
- int count = 0;
-
- @Override
- protected void map(Key k, Value v, Context context) throws IOException, InterruptedException {
- try {
- if (key != null)
- assertEquals(key.getRow().toString(), new String(v.get()));
- assertEquals(k.getRow(), new Text(String.format("%09x", count + 1)));
- assertEquals(new String(v.get()), String.format("%09x", count));
- } catch (AssertionError e) {
- e1 = e;
- }
- key = new Key(k);
- count++;
- }
-
- @Override
- protected void cleanup(Context context) throws IOException, InterruptedException {
- Mutation m = new Mutation("total");
- m.put("", "", Integer.toString(count));
- context.write(new Text(), m);
- }
- }
-
- @Override
- public int run(String[] args) throws Exception {
-
- if (args.length != 4) {
- throw new IllegalArgumentException("Usage : " + MRTokenFileTester.class.getName() + " <user> <token file> <inputtable> <outputtable>");
- }
-
- String user = args[0];
- String tokenFile = args[1];
- String table1 = args[2];
- String table2 = args[3];
-
- @SuppressWarnings("deprecation")
- Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
- job.setJarByClass(this.getClass());
-
- job.setInputFormatClass(AccumuloInputFormat.class);
-
- AccumuloInputFormat.setConnectorInfo(job, user, tokenFile);
- AccumuloInputFormat.setInputTableName(job, table1);
- AccumuloInputFormat.setMockInstance(job, INSTANCE_NAME);
-
- job.setMapperClass(TestMapper.class);
- job.setMapOutputKeyClass(Key.class);
- job.setMapOutputValueClass(Value.class);
- job.setOutputFormatClass(AccumuloOutputFormat.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(Mutation.class);
-
- AccumuloOutputFormat.setConnectorInfo(job, user, tokenFile);
- AccumuloOutputFormat.setCreateTables(job, false);
- AccumuloOutputFormat.setDefaultTableName(job, table2);
- AccumuloOutputFormat.setMockInstance(job, INSTANCE_NAME);
-
- job.setNumReduceTasks(0);
-
- job.waitForCompletion(true);
-
- return job.isSuccessful() ? 0 : 1;
- }
-
- public static void main(String[] args) throws Exception {
- Configuration conf = CachedConfiguration.getInstance();
- conf.set("hadoop.tmp.dir", new File(args[1]).getParent());
- assertEquals(0, ToolRunner.run(conf, new MRTokenFileTester(), args));
- }
- }
-
- @Rule
- public TemporaryFolder folder = new TemporaryFolder(new File(System.getProperty("user.dir") + "/target"));
-
- @Test
- public void testMR() throws Exception {
- MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
- Connector c = mockInstance.getConnector("root", new PasswordToken(""));
- c.tableOperations().create(TEST_TABLE_1);
- c.tableOperations().create(TEST_TABLE_2);
- BatchWriter bw = c.createBatchWriter(TEST_TABLE_1, new BatchWriterConfig());
- for (int i = 0; i < 100; i++) {
- Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
- m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
- bw.addMutation(m);
- }
- bw.close();
-
- File tf = folder.newFile("root_test.pw");
- PrintStream out = new PrintStream(tf);
- String outString = new Credentials("root", new PasswordToken("")).serialize();
- out.println(outString);
- out.close();
-
- MRTokenFileTester.main(new String[] {"root", tf.getAbsolutePath(), TEST_TABLE_1, TEST_TABLE_2});
- assertNull(e1);
-
- Scanner scanner = c.createScanner(TEST_TABLE_2, new Authorizations());
- Iterator<Entry<Key,Value>> iter = scanner.iterator();
- assertTrue(iter.hasNext());
- Entry<Key,Value> entry = iter.next();
- assertEquals(Integer.parseInt(new String(entry.getValue().get())), 100);
- assertFalse(iter.hasNext());
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/test/java/org/apache/accumulo/core/client/mapreduce/lib/impl/ConfiguratorBaseTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/lib/impl/ConfiguratorBaseTest.java b/core/src/test/java/org/apache/accumulo/core/client/mapreduce/lib/impl/ConfiguratorBaseTest.java
deleted file mode 100644
index 1983470..0000000
--- a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/lib/impl/ConfiguratorBaseTest.java
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce.lib.impl;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-import org.apache.accumulo.core.client.AccumuloSecurityException;
-import org.apache.accumulo.core.client.ClientConfiguration;
-import org.apache.accumulo.core.client.ClientConfiguration.ClientProperty;
-import org.apache.accumulo.core.client.Instance;
-import org.apache.accumulo.core.client.ZooKeeperInstance;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
-import org.apache.accumulo.core.client.security.tokens.AuthenticationToken.AuthenticationTokenSerializer;
-import org.apache.accumulo.core.client.security.tokens.PasswordToken;
-import org.apache.commons.codec.binary.Base64;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.log4j.Level;
-import org.apache.log4j.Logger;
-import org.junit.Test;
-
-/**
- *
- */
-public class ConfiguratorBaseTest {
-
- private static enum PrivateTestingEnum {
- SOMETHING, SOMETHING_ELSE
- }
-
- @Test
- public void testEnumToConfKey() {
- assertEquals(this.getClass().getSimpleName() + ".PrivateTestingEnum.Something",
- ConfiguratorBase.enumToConfKey(this.getClass(), PrivateTestingEnum.SOMETHING));
- assertEquals(this.getClass().getSimpleName() + ".PrivateTestingEnum.SomethingElse",
- ConfiguratorBase.enumToConfKey(this.getClass(), PrivateTestingEnum.SOMETHING_ELSE));
- }
-
- @Test
- public void testSetConnectorInfoClassOfQConfigurationStringAuthenticationToken() throws AccumuloSecurityException {
- Configuration conf = new Configuration();
- assertFalse(ConfiguratorBase.isConnectorInfoSet(this.getClass(), conf));
- ConfiguratorBase.setConnectorInfo(this.getClass(), conf, "testUser", new PasswordToken("testPassword"));
- assertTrue(ConfiguratorBase.isConnectorInfoSet(this.getClass(), conf));
- assertEquals("testUser", ConfiguratorBase.getPrincipal(this.getClass(), conf));
- AuthenticationToken token = ConfiguratorBase.getAuthenticationToken(this.getClass(), conf);
- assertEquals(PasswordToken.class, token.getClass());
- assertEquals(new PasswordToken("testPassword"), token);
- assertEquals(
- "inline:" + PasswordToken.class.getName() + ":" + Base64.encodeBase64String(AuthenticationTokenSerializer.serialize(new PasswordToken("testPassword"))),
- conf.get(ConfiguratorBase.enumToConfKey(this.getClass(), ConfiguratorBase.ConnectorInfo.TOKEN)));
- }
-
- @Test
- public void testSetConnectorInfoClassOfQConfigurationStringString() throws AccumuloSecurityException {
- Configuration conf = new Configuration();
- assertFalse(ConfiguratorBase.isConnectorInfoSet(this.getClass(), conf));
- ConfiguratorBase.setConnectorInfo(this.getClass(), conf, "testUser", "testFile");
- assertTrue(ConfiguratorBase.isConnectorInfoSet(this.getClass(), conf));
- assertEquals("testUser", ConfiguratorBase.getPrincipal(this.getClass(), conf));
- assertEquals("file:testFile", conf.get(ConfiguratorBase.enumToConfKey(this.getClass(), ConfiguratorBase.ConnectorInfo.TOKEN)));
- }
-
- @Test
- public void testSetZooKeeperInstance() {
- Configuration conf = new Configuration();
- ConfiguratorBase.setZooKeeperInstance(this.getClass(), conf, new ClientConfiguration().withInstance("testInstanceName").withZkHosts("testZooKeepers")
- .withSsl(true).withZkTimeout(1234));
- ClientConfiguration clientConf = ClientConfiguration.deserialize(conf.get(ConfiguratorBase.enumToConfKey(this.getClass(),
- ConfiguratorBase.InstanceOpts.CLIENT_CONFIG)));
- assertEquals("testInstanceName", clientConf.get(ClientProperty.INSTANCE_NAME));
- assertEquals("testZooKeepers", clientConf.get(ClientProperty.INSTANCE_ZK_HOST));
- assertEquals("true", clientConf.get(ClientProperty.INSTANCE_RPC_SSL_ENABLED));
- assertEquals("1234", clientConf.get(ClientProperty.INSTANCE_ZK_TIMEOUT));
- assertEquals(ZooKeeperInstance.class.getSimpleName(), conf.get(ConfiguratorBase.enumToConfKey(this.getClass(), ConfiguratorBase.InstanceOpts.TYPE)));
-
- Instance instance = ConfiguratorBase.getInstance(this.getClass(), conf);
- assertEquals(ZooKeeperInstance.class.getName(), instance.getClass().getName());
- assertEquals("testInstanceName", ((ZooKeeperInstance) instance).getInstanceName());
- assertEquals("testZooKeepers", ((ZooKeeperInstance) instance).getZooKeepers());
- assertEquals(1234000, ((ZooKeeperInstance) instance).getZooKeepersSessionTimeOut());
- }
-
- @Test
- public void testSetMockInstance() {
- Configuration conf = new Configuration();
- ConfiguratorBase.setMockInstance(this.getClass(), conf, "testInstanceName");
- assertEquals("testInstanceName", conf.get(ConfiguratorBase.enumToConfKey(this.getClass(), ConfiguratorBase.InstanceOpts.NAME)));
- assertEquals(null, conf.get(ConfiguratorBase.enumToConfKey(this.getClass(), ConfiguratorBase.InstanceOpts.ZOO_KEEPERS)));
- assertEquals(MockInstance.class.getSimpleName(), conf.get(ConfiguratorBase.enumToConfKey(this.getClass(), ConfiguratorBase.InstanceOpts.TYPE)));
- Instance instance = ConfiguratorBase.getInstance(this.getClass(), conf);
- assertEquals(MockInstance.class.getName(), instance.getClass().getName());
- }
-
- @Test
- public void testSetLogLevel() {
- Configuration conf = new Configuration();
- Level currentLevel = Logger.getLogger(this.getClass()).getLevel();
-
- ConfiguratorBase.setLogLevel(this.getClass(), conf, Level.DEBUG);
- Logger.getLogger(this.getClass()).setLevel(currentLevel);
- assertEquals(Level.DEBUG, ConfiguratorBase.getLogLevel(this.getClass(), conf));
-
- ConfiguratorBase.setLogLevel(this.getClass(), conf, Level.INFO);
- Logger.getLogger(this.getClass()).setLevel(currentLevel);
- assertEquals(Level.INFO, ConfiguratorBase.getLogLevel(this.getClass(), conf));
-
- ConfiguratorBase.setLogLevel(this.getClass(), conf, Level.FATAL);
- Logger.getLogger(this.getClass()).setLevel(currentLevel);
- assertEquals(Level.FATAL, ConfiguratorBase.getLogLevel(this.getClass(), conf));
- }
-
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/test/java/org/apache/accumulo/core/client/mapreduce/lib/partition/RangePartitionerTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/lib/partition/RangePartitionerTest.java b/core/src/test/java/org/apache/accumulo/core/client/mapreduce/lib/partition/RangePartitionerTest.java
deleted file mode 100644
index 8fca169..0000000
--- a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/lib/partition/RangePartitionerTest.java
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce.lib.partition;
-
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.junit.Test;
-
-public class RangePartitionerTest {
-
- private static Text[] cutArray = new Text[] {new Text("A"), new Text("B"), new Text("C")};
-
- @Test
- public void testNoSubBins() throws IOException {
- for (int i = -2; i < 2; ++i) {
- checkExpectedBins(i, new String[] {"A", "B", "C"}, new int[] {0, 1, 2});
- checkExpectedBins(i, new String[] {"C", "A", "B"}, new int[] {2, 0, 1});
- checkExpectedBins(i, new String[] {"", "AA", "BB", "CC"}, new int[] {0, 1, 2, 3});
- }
- }
-
- @Test
- public void testSubBins() throws IOException {
- checkExpectedRangeBins(2, new String[] {"A", "B", "C"}, new int[] {1, 3, 5});
- checkExpectedRangeBins(2, new String[] {"C", "A", "B"}, new int[] {5, 1, 3});
- checkExpectedRangeBins(2, new String[] {"", "AA", "BB", "CC"}, new int[] {1, 3, 5, 7});
-
- checkExpectedRangeBins(3, new String[] {"A", "B", "C"}, new int[] {2, 5, 8});
- checkExpectedRangeBins(3, new String[] {"C", "A", "B"}, new int[] {8, 2, 5});
- checkExpectedRangeBins(3, new String[] {"", "AA", "BB", "CC"}, new int[] {2, 5, 8, 11});
-
- checkExpectedRangeBins(10, new String[] {"A", "B", "C"}, new int[] {9, 19, 29});
- checkExpectedRangeBins(10, new String[] {"C", "A", "B"}, new int[] {29, 9, 19});
- checkExpectedRangeBins(10, new String[] {"", "AA", "BB", "CC"}, new int[] {9, 19, 29, 39});
- }
-
- private RangePartitioner prepPartitioner(int numSubBins) throws IOException {
- @SuppressWarnings("deprecation")
- Job job = new Job();
- RangePartitioner.setNumSubBins(job, numSubBins);
- RangePartitioner rp = new RangePartitioner();
- rp.setConf(job.getConfiguration());
- return rp;
- }
-
- private void checkExpectedRangeBins(int numSubBins, String[] strings, int[] rangeEnds) throws IOException {
- assertTrue(strings.length == rangeEnds.length);
- for (int i = 0; i < strings.length; ++i) {
- int endRange = rangeEnds[i];
- int startRange = endRange + 1 - numSubBins;
- int part = prepPartitioner(numSubBins).findPartition(new Text(strings[i]), cutArray, numSubBins);
- assertTrue(part >= startRange);
- assertTrue(part <= endRange);
- }
- }
-
- private void checkExpectedBins(int numSubBins, String[] strings, int[] bins) throws IOException {
- assertTrue(strings.length == bins.length);
- for (int i = 0; i < strings.length; ++i) {
- int bin = bins[i], part = prepPartitioner(numSubBins).findPartition(new Text(strings[i]), cutArray, numSubBins);
- assertTrue(bin == part);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/examples/simple/pom.xml
----------------------------------------------------------------------
diff --git a/examples/simple/pom.xml b/examples/simple/pom.xml
index 8390d01..37adc00 100644
--- a/examples/simple/pom.xml
+++ b/examples/simple/pom.xml
@@ -61,6 +61,10 @@
</dependency>
<dependency>
<groupId>org.apache.accumulo</groupId>
+ <artifactId>accumulo-mapreduce</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.accumulo</groupId>
<artifactId>accumulo-shell</artifactId>
</dependency>
<dependency>
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/pom.xml
----------------------------------------------------------------------
diff --git a/mapreduce/pom.xml b/mapreduce/pom.xml
new file mode 100644
index 0000000..c9cb110
--- /dev/null
+++ b/mapreduce/pom.xml
@@ -0,0 +1,67 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.accumulo</groupId>
+ <artifactId>accumulo-project</artifactId>
+ <version>1.7.0-SNAPSHOT</version>
+ </parent>
+ <artifactId>accumulo-mapreduce</artifactId>
+ <name>MapReduce</name>
+ <description>The Map Reduce bindings for accessing Apache Accumulo.</description>
+ <dependencies>
+ <dependency>
+ <groupId>com.beust</groupId>
+ <artifactId>jcommander</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-collections</groupId>
+ <artifactId>commons-collections</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-configuration</groupId>
+ <artifactId>commons-configuration</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.accumulo</groupId>
+ <artifactId>accumulo-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+</project>
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOnDefaultTable.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOnDefaultTable.java b/mapreduce/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOnDefaultTable.java
new file mode 100644
index 0000000..e7a3dd4
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOnDefaultTable.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.cli;
+
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
+import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
+import org.apache.hadoop.mapreduce.Job;
+
+import com.beust.jcommander.Parameter;
+
+public class MapReduceClientOnDefaultTable extends MapReduceClientOpts {
+ @Parameter(names = "--table", description = "table to use")
+ public String tableName;
+
+ public MapReduceClientOnDefaultTable(String table) {
+ this.tableName = table;
+ }
+
+ public String getTableName() {
+ return tableName;
+ }
+
+ @Override
+ public void setAccumuloConfigs(Job job) throws AccumuloSecurityException {
+ super.setAccumuloConfigs(job);
+ AccumuloInputFormat.setConnectorInfo(job, principal, getToken());
+ AccumuloInputFormat.setInputTableName(job, getTableName());
+ AccumuloInputFormat.setScanAuthorizations(job, auths);
+ AccumuloOutputFormat.setConnectorInfo(job, principal, getToken());
+ AccumuloOutputFormat.setCreateTables(job, true);
+ AccumuloOutputFormat.setDefaultTableName(job, getTableName());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOnRequiredTable.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOnRequiredTable.java b/mapreduce/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOnRequiredTable.java
new file mode 100644
index 0000000..abfc17d
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOnRequiredTable.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.cli;
+
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
+import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
+import org.apache.hadoop.mapreduce.Job;
+
+import com.beust.jcommander.Parameter;
+
+public class MapReduceClientOnRequiredTable extends MapReduceClientOpts {
+ @Parameter(names = {"-t", "--table"}, required = true, description = "table to use")
+ private String tableName;
+
+ @Parameter(names = {"-tf", "--tokenFile"}, description = "File in hdfs containing the user's authentication token create with \"bin/accumulo create-token\"")
+ private String tokenFile = "";
+
+ @Override
+ public void setAccumuloConfigs(Job job) throws AccumuloSecurityException {
+ super.setAccumuloConfigs(job);
+
+ if (tokenFile.isEmpty()) {
+ AccumuloInputFormat.setConnectorInfo(job, principal, getToken());
+ AccumuloOutputFormat.setConnectorInfo(job, principal, getToken());
+ } else {
+ AccumuloInputFormat.setConnectorInfo(job, principal, tokenFile);
+ AccumuloOutputFormat.setConnectorInfo(job, principal, tokenFile);
+ }
+ AccumuloInputFormat.setInputTableName(job, getTableName());
+ AccumuloInputFormat.setScanAuthorizations(job, auths);
+ AccumuloOutputFormat.setCreateTables(job, true);
+ AccumuloOutputFormat.setDefaultTableName(job, getTableName());
+ }
+
+ public String getTableName() {
+ return tableName;
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOpts.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOpts.java b/mapreduce/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOpts.java
new file mode 100644
index 0000000..4b3b7ed
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOpts.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.cli;
+
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
+import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
+import org.apache.hadoop.mapreduce.Job;
+
+/**
+ * Adds some MR awareness to the ClientOpts
+ */
+public class MapReduceClientOpts extends ClientOpts {
+ public void setAccumuloConfigs(Job job) throws AccumuloSecurityException {
+ AccumuloInputFormat.setZooKeeperInstance(job, this.getClientConfiguration());
+ AccumuloOutputFormat.setZooKeeperInstance(job, this.getClientConfiguration());
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
new file mode 100644
index 0000000..5af78d2
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
@@ -0,0 +1,607 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapred;
+
+import java.io.IOException;
+import java.net.InetAddress;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.ClientConfiguration;
+import org.apache.accumulo.core.client.ClientSideIteratorScanner;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.Instance;
+import org.apache.accumulo.core.client.IsolatedScanner;
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.TableDeletedException;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.TableOfflineException;
+import org.apache.accumulo.core.client.impl.OfflineScanner;
+import org.apache.accumulo.core.client.impl.ScannerImpl;
+import org.apache.accumulo.core.client.impl.Tables;
+import org.apache.accumulo.core.client.impl.TabletLocator;
+import org.apache.accumulo.core.client.mapreduce.InputTableConfig;
+import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.KeyExtent;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.master.state.tables.TableState;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.security.Credentials;
+import org.apache.accumulo.core.util.Pair;
+import org.apache.accumulo.core.util.UtilWaitThread;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+
+/**
+ * An abstract input format to provide shared methods common to all other input format classes. At the very least, any classes inheriting from this class will
+ * need to define their own {@link RecordReader}.
+ */
+public abstract class AbstractInputFormat<K,V> implements InputFormat<K,V> {
+ protected static final Class<?> CLASS = AccumuloInputFormat.class;
+ protected static final Logger log = Logger.getLogger(CLASS);
+
+ /**
+ * Sets the connector information needed to communicate with Accumulo in this job.
+ *
+ * <p>
+ * <b>WARNING:</b> The serialized token is stored in the configuration and shared with all MapReduce tasks. It is BASE64 encoded to provide a charset safe
+ * conversion to a string, and is not intended to be secure.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param principal
+ * a valid Accumulo user name (user must have Table.CREATE permission)
+ * @param token
+ * the user's password
+ * @since 1.5.0
+ */
+ public static void setConnectorInfo(JobConf job, String principal, AuthenticationToken token) throws AccumuloSecurityException {
+ InputConfigurator.setConnectorInfo(CLASS, job, principal, token);
+ }
+
+ /**
+ * Sets the connector information needed to communicate with Accumulo in this job.
+ *
+ * <p>
+ * Stores the password in a file in HDFS and pulls that into the Distributed Cache in an attempt to be more secure than storing it in the Configuration.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param principal
+ * a valid Accumulo user name (user must have Table.CREATE permission)
+ * @param tokenFile
+ * the path to the token file
+ * @since 1.6.0
+ */
+ public static void setConnectorInfo(JobConf job, String principal, String tokenFile) throws AccumuloSecurityException {
+ InputConfigurator.setConnectorInfo(CLASS, job, principal, tokenFile);
+ }
+
+ /**
+ * Determines if the connector has been configured.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @return true if the connector has been configured, false otherwise
+ * @since 1.5.0
+ * @see #setConnectorInfo(JobConf, String, AuthenticationToken)
+ */
+ protected static Boolean isConnectorInfoSet(JobConf job) {
+ return InputConfigurator.isConnectorInfoSet(CLASS, job);
+ }
+
+ /**
+ * Gets the user name from the configuration.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @return the user name
+ * @since 1.5.0
+ * @see #setConnectorInfo(JobConf, String, AuthenticationToken)
+ */
+ protected static String getPrincipal(JobConf job) {
+ return InputConfigurator.getPrincipal(CLASS, job);
+ }
+
+ /**
+ * Gets the authenticated token from either the specified token file or directly from the configuration, whichever was used when the job was configured.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @return the principal's authentication token
+ * @since 1.6.0
+ * @see #setConnectorInfo(JobConf, String, AuthenticationToken)
+ * @see #setConnectorInfo(JobConf, String, String)
+ */
+ protected static AuthenticationToken getAuthenticationToken(JobConf job) {
+ return InputConfigurator.getAuthenticationToken(CLASS, job);
+ }
+
+ /**
+ * Configures a {@link org.apache.accumulo.core.client.ZooKeeperInstance} for this job.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param clientConfig
+ * client configuration containing connection options
+ * @since 1.6.0
+ */
+ public static void setZooKeeperInstance(JobConf job, ClientConfiguration clientConfig) {
+ InputConfigurator.setZooKeeperInstance(CLASS, job, clientConfig);
+ }
+
+ /**
+ * Configures a {@link org.apache.accumulo.core.client.mock.MockInstance} for this job.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param instanceName
+ * the Accumulo instance name
+ * @since 1.5.0
+ */
+ public static void setMockInstance(JobConf job, String instanceName) {
+ InputConfigurator.setMockInstance(CLASS, job, instanceName);
+ }
+
+ /**
+ * Initializes an Accumulo {@link org.apache.accumulo.core.client.Instance} based on the configuration.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @return an Accumulo instance
+ * @since 1.5.0
+ * @see #setZooKeeperInstance(JobConf, ClientConfiguration)
+ * @see #setMockInstance(JobConf, String)
+ */
+ protected static Instance getInstance(JobConf job) {
+ return InputConfigurator.getInstance(CLASS, job);
+ }
+
+ /**
+ * Sets the log level for this job.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param level
+ * the logging level
+ * @since 1.5.0
+ */
+ public static void setLogLevel(JobConf job, Level level) {
+ InputConfigurator.setLogLevel(CLASS, job, level);
+ }
+
+ /**
+ * Gets the log level from this configuration.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @return the log level
+ * @since 1.5.0
+ * @see #setLogLevel(JobConf, Level)
+ */
+ protected static Level getLogLevel(JobConf job) {
+ return InputConfigurator.getLogLevel(CLASS, job);
+ }
+
+ /**
+ * Sets the {@link org.apache.accumulo.core.security.Authorizations} used to scan. Must be a subset of the user's authorization. Defaults to the empty set.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param auths
+ * the user's authorizations
+ * @since 1.5.0
+ */
+ public static void setScanAuthorizations(JobConf job, Authorizations auths) {
+ InputConfigurator.setScanAuthorizations(CLASS, job, auths);
+ }
+
+ /**
+ * Gets the authorizations to set for the scans from the configuration.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @return the Accumulo scan authorizations
+ * @since 1.5.0
+ * @see #setScanAuthorizations(JobConf, Authorizations)
+ */
+ protected static Authorizations getScanAuthorizations(JobConf job) {
+ return InputConfigurator.getScanAuthorizations(CLASS, job);
+ }
+
+ /**
+ * Initializes an Accumulo {@link org.apache.accumulo.core.client.impl.TabletLocator} based on the configuration.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @return an Accumulo tablet locator
+ * @throws org.apache.accumulo.core.client.TableNotFoundException
+ * if the table name set on the configuration doesn't exist
+ * @since 1.6.0
+ */
+ protected static TabletLocator getTabletLocator(JobConf job, String tableId) throws TableNotFoundException {
+ return InputConfigurator.getTabletLocator(CLASS, job, tableId);
+ }
+
+ // InputFormat doesn't have the equivalent of OutputFormat's checkOutputSpecs(JobContext job)
+ /**
+ * Check whether a configuration is fully configured to be used with an Accumulo {@link InputFormat}.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @throws java.io.IOException
+ * if the context is improperly configured
+ * @since 1.5.0
+ */
+ protected static void validateOptions(JobConf job) throws IOException {
+ InputConfigurator.validateOptions(CLASS, job);
+ }
+
+ /**
+ * Fetches all {@link InputTableConfig}s that have been set on the given Hadoop job.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @return the {@link InputTableConfig} objects set on the job
+ * @since 1.6.0
+ */
+ public static Map<String,InputTableConfig> getInputTableConfigs(JobConf job) {
+ return InputConfigurator.getInputTableConfigs(CLASS, job);
+ }
+
+ /**
+ * Fetches a {@link InputTableConfig} that has been set on the configuration for a specific table.
+ *
+ * <p>
+ * null is returned in the event that the table doesn't exist.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param tableName
+ * the table name for which to grab the config object
+ * @return the {@link InputTableConfig} for the given table
+ * @since 1.6.0
+ */
+ public static InputTableConfig getInputTableConfig(JobConf job, String tableName) {
+ return InputConfigurator.getInputTableConfig(CLASS, job, tableName);
+ }
+
+ /**
+ * An abstract base class to be used to create {@link org.apache.hadoop.mapred.RecordReader} instances that convert from Accumulo
+ * {@link org.apache.accumulo.core.data.Key}/{@link org.apache.accumulo.core.data.Value} pairs to the user's K/V types.
+ *
+ * Subclasses must implement {@link #next(Object, Object)} to update key and value, and also to update the following variables:
+ * <ul>
+ * <li>Key {@link #currentKey} (used for progress reporting)</li>
+ * <li>int {@link #numKeysRead} (used for progress reporting)</li>
+ * </ul>
+ */
+ protected abstract static class AbstractRecordReader<K,V> implements RecordReader<K,V> {
+ protected long numKeysRead;
+ protected Iterator<Map.Entry<Key,Value>> scannerIterator;
+ protected RangeInputSplit split;
+
+ /**
+ * Configures the iterators on a scanner for the given table name.
+ *
+ * @param job
+ * the Hadoop job configuration
+ * @param scanner
+ * the scanner for which to configure the iterators
+ * @param tableName
+ * the table name for which the scanner is configured
+ * @since 1.6.0
+ */
+ protected abstract void setupIterators(JobConf job, Scanner scanner, String tableName, RangeInputSplit split);
+
+ /**
+ * Initialize a scanner over the given input split using this task attempt configuration.
+ */
+ public void initialize(InputSplit inSplit, JobConf job) throws IOException {
+ Scanner scanner;
+ split = (RangeInputSplit) inSplit;
+ log.debug("Initializing input split: " + split.getRange());
+
+ Instance instance = split.getInstance();
+ if (null == instance) {
+ instance = getInstance(job);
+ }
+
+ String principal = split.getPrincipal();
+ if (null == principal) {
+ principal = getPrincipal(job);
+ }
+
+ AuthenticationToken token = split.getToken();
+ if (null == token) {
+ token = getAuthenticationToken(job);
+ }
+
+ Authorizations authorizations = split.getAuths();
+ if (null == authorizations) {
+ authorizations = getScanAuthorizations(job);
+ }
+
+ String table = split.getTableName();
+
+ // in case the table name changed, we can still use the previous name for terms of configuration,
+ // but the scanner will use the table id resolved at job setup time
+ InputTableConfig tableConfig = getInputTableConfig(job, split.getTableName());
+
+ Boolean isOffline = split.isOffline();
+ if (null == isOffline) {
+ isOffline = tableConfig.isOfflineScan();
+ }
+
+ Boolean isIsolated = split.isIsolatedScan();
+ if (null == isIsolated) {
+ isIsolated = tableConfig.shouldUseIsolatedScanners();
+ }
+
+ Boolean usesLocalIterators = split.usesLocalIterators();
+ if (null == usesLocalIterators) {
+ usesLocalIterators = tableConfig.shouldUseLocalIterators();
+ }
+
+ List<IteratorSetting> iterators = split.getIterators();
+ if (null == iterators) {
+ iterators = tableConfig.getIterators();
+ }
+
+ Collection<Pair<Text,Text>> columns = split.getFetchedColumns();
+ if (null == columns) {
+ columns = tableConfig.getFetchedColumns();
+ }
+
+ try {
+ log.debug("Creating connector with user: " + principal);
+ log.debug("Creating scanner for table: " + table);
+ log.debug("Authorizations are: " + authorizations);
+ if (isOffline) {
+ scanner = new OfflineScanner(instance, new Credentials(principal, token), split.getTableId(), authorizations);
+ } else if (instance instanceof MockInstance) {
+ scanner = instance.getConnector(principal, token).createScanner(split.getTableName(), authorizations);
+ } else {
+ scanner = new ScannerImpl(instance, new Credentials(principal, token), split.getTableId(), authorizations);
+ }
+ if (isIsolated) {
+ log.info("Creating isolated scanner");
+ scanner = new IsolatedScanner(scanner);
+ }
+ if (usesLocalIterators) {
+ log.info("Using local iterators");
+ scanner = new ClientSideIteratorScanner(scanner);
+ }
+ setupIterators(job, scanner, split.getTableName(), split);
+ } catch (Exception e) {
+ throw new IOException(e);
+ }
+
+ // setup a scanner within the bounds of this split
+ for (Pair<Text,Text> c : columns) {
+ if (c.getSecond() != null) {
+ log.debug("Fetching column " + c.getFirst() + ":" + c.getSecond());
+ scanner.fetchColumn(c.getFirst(), c.getSecond());
+ } else {
+ log.debug("Fetching column family " + c.getFirst());
+ scanner.fetchColumnFamily(c.getFirst());
+ }
+ }
+
+ scanner.setRange(split.getRange());
+
+ numKeysRead = 0;
+
+ // do this last after setting all scanner options
+ scannerIterator = scanner.iterator();
+ }
+
+ @Override
+ public void close() {}
+
+ @Override
+ public long getPos() throws IOException {
+ return numKeysRead;
+ }
+
+ @Override
+ public float getProgress() throws IOException {
+ if (numKeysRead > 0 && currentKey == null)
+ return 1.0f;
+ return split.getProgress(currentKey);
+ }
+
+ protected Key currentKey = null;
+
+ }
+
+ Map<String,Map<KeyExtent,List<Range>>> binOfflineTable(JobConf job, String tableId, List<Range> ranges) throws TableNotFoundException, AccumuloException,
+ AccumuloSecurityException {
+
+ Instance instance = getInstance(job);
+ Connector conn = instance.getConnector(getPrincipal(job), getAuthenticationToken(job));
+
+ return InputConfigurator.binOffline(tableId, ranges, instance, conn);
+ }
+
+ /**
+ * Read the metadata table to get tablets and match up ranges to them.
+ */
+ @Override
+ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
+ Level logLevel = getLogLevel(job);
+ log.setLevel(logLevel);
+ validateOptions(job);
+
+ Random random = new Random();
+ LinkedList<InputSplit> splits = new LinkedList<InputSplit>();
+ Map<String,InputTableConfig> tableConfigs = getInputTableConfigs(job);
+ for (Map.Entry<String,InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) {
+ String tableName = tableConfigEntry.getKey();
+ InputTableConfig tableConfig = tableConfigEntry.getValue();
+
+ Instance instance = getInstance(job);
+ boolean mockInstance;
+ String tableId;
+ // resolve table name to id once, and use id from this point forward
+ if (instance instanceof MockInstance) {
+ tableId = "";
+ mockInstance = true;
+ } else {
+ try {
+ tableId = Tables.getTableId(instance, tableName);
+ } catch (TableNotFoundException e) {
+ throw new IOException(e);
+ }
+ mockInstance = false;
+ }
+
+ Authorizations auths = getScanAuthorizations(job);
+ String principal = getPrincipal(job);
+ AuthenticationToken token = getAuthenticationToken(job);
+
+ boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
+ List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges();
+ if (ranges.isEmpty()) {
+ ranges = new ArrayList<Range>(1);
+ ranges.add(new Range());
+ }
+
+ // get the metadata information for these ranges
+ Map<String,Map<KeyExtent,List<Range>>> binnedRanges = new HashMap<String,Map<KeyExtent,List<Range>>>();
+ TabletLocator tl;
+ try {
+ if (tableConfig.isOfflineScan()) {
+ binnedRanges = binOfflineTable(job, tableId, ranges);
+ while (binnedRanges == null) {
+ // Some tablets were still online, try again
+ UtilWaitThread.sleep(100 + random.nextInt(100)); // sleep randomly between 100 and 200 ms
+ binnedRanges = binOfflineTable(job, tableId, ranges);
+ }
+ } else {
+ tl = getTabletLocator(job, tableId);
+ // its possible that the cache could contain complete, but old information about a tables tablets... so clear it
+ tl.invalidateCache();
+ Credentials creds = new Credentials(getPrincipal(job), getAuthenticationToken(job));
+
+ while (!tl.binRanges(creds, ranges, binnedRanges).isEmpty()) {
+ if (!(instance instanceof MockInstance)) {
+ if (!Tables.exists(instance, tableId))
+ throw new TableDeletedException(tableId);
+ if (Tables.getTableState(instance, tableId) == TableState.OFFLINE)
+ throw new TableOfflineException(instance, tableId);
+ }
+ binnedRanges.clear();
+ log.warn("Unable to locate bins for specified ranges. Retrying.");
+ UtilWaitThread.sleep(100 + random.nextInt(100)); // sleep randomly between 100 and 200 ms
+ tl.invalidateCache();
+ }
+ }
+ } catch (Exception e) {
+ throw new IOException(e);
+ }
+
+ HashMap<Range,ArrayList<String>> splitsToAdd = null;
+
+ if (!autoAdjust)
+ splitsToAdd = new HashMap<Range,ArrayList<String>>();
+
+ HashMap<String,String> hostNameCache = new HashMap<String,String>();
+ for (Map.Entry<String,Map<KeyExtent,List<Range>>> tserverBin : binnedRanges.entrySet()) {
+ String ip = tserverBin.getKey().split(":", 2)[0];
+ String location = hostNameCache.get(ip);
+ if (location == null) {
+ InetAddress inetAddress = InetAddress.getByName(ip);
+ location = inetAddress.getCanonicalHostName();
+ hostNameCache.put(ip, location);
+ }
+ for (Map.Entry<KeyExtent,List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
+ Range ke = extentRanges.getKey().toDataRange();
+ for (Range r : extentRanges.getValue()) {
+ if (autoAdjust) {
+ // divide ranges into smaller ranges, based on the tablets
+ RangeInputSplit split = new RangeInputSplit(tableName, tableId, ke.clip(r), new String[] {location});
+
+ split.setOffline(tableConfig.isOfflineScan());
+ split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
+ split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
+ split.setMockInstance(mockInstance);
+ split.setFetchedColumns(tableConfig.getFetchedColumns());
+ split.setPrincipal(principal);
+ split.setToken(token);
+ split.setInstanceName(instance.getInstanceName());
+ split.setZooKeepers(instance.getZooKeepers());
+ split.setAuths(auths);
+ split.setIterators(tableConfig.getIterators());
+ split.setLogLevel(logLevel);
+
+ splits.add(split);
+ } else {
+ // don't divide ranges
+ ArrayList<String> locations = splitsToAdd.get(r);
+ if (locations == null)
+ locations = new ArrayList<String>(1);
+ locations.add(location);
+ splitsToAdd.put(r, locations);
+ }
+ }
+ }
+ }
+
+ if (!autoAdjust)
+ for (Map.Entry<Range,ArrayList<String>> entry : splitsToAdd.entrySet()) {
+ RangeInputSplit split = new RangeInputSplit(tableName, tableId, entry.getKey(), entry.getValue().toArray(new String[0]));
+
+ split.setOffline(tableConfig.isOfflineScan());
+ split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
+ split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
+ split.setMockInstance(mockInstance);
+ split.setFetchedColumns(tableConfig.getFetchedColumns());
+ split.setPrincipal(principal);
+ split.setToken(token);
+ split.setInstanceName(instance.getInstanceName());
+ split.setZooKeepers(instance.getZooKeepers());
+ split.setAuths(auths);
+ split.setIterators(tableConfig.getIterators());
+ split.setLogLevel(logLevel);
+
+ splits.add(split);
+ }
+ }
+
+ return splits.toArray(new InputSplit[splits.size()]);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormat.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormat.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormat.java
new file mode 100644
index 0000000..8a1d6df
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormat.java
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapred;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.accumulo.core.client.mapreduce.lib.impl.FileOutputConfigurator;
+import org.apache.accumulo.core.conf.AccumuloConfiguration;
+import org.apache.accumulo.core.conf.Property;
+import org.apache.accumulo.core.data.ArrayByteSequence;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.file.FileOperations;
+import org.apache.accumulo.core.file.FileSKVWriter;
+import org.apache.accumulo.core.security.ColumnVisibility;
+import org.apache.commons.collections.map.LRUMap;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordWriter;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.util.Progressable;
+import org.apache.log4j.Logger;
+
+/**
+ * This class allows MapReduce jobs to write output in the Accumulo data file format.<br />
+ * Care should be taken to write only sorted data (sorted by {@link Key}), as this is an important requirement of Accumulo data files.
+ *
+ * <p>
+ * The output path to be created must be specified via {@link AccumuloFileOutputFormat#setOutputPath(JobConf, Path)}. This is inherited from
+ * {@link FileOutputFormat#setOutputPath(JobConf, Path)}. Other methods from {@link FileOutputFormat} are not supported and may be ignored or cause failures.
+ * Using other Hadoop configuration options that affect the behavior of the underlying files directly in the Job's configuration may work, but are not directly
+ * supported at this time.
+ */
+public class AccumuloFileOutputFormat extends FileOutputFormat<Key,Value> {
+
+ private static final Class<?> CLASS = AccumuloFileOutputFormat.class;
+ protected static final Logger log = Logger.getLogger(CLASS);
+
+ /**
+ * This helper method provides an AccumuloConfiguration object constructed from the Accumulo defaults, and overridden with Accumulo properties that have been
+ * stored in the Job's configuration.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @since 1.5.0
+ */
+ protected static AccumuloConfiguration getAccumuloConfiguration(JobConf job) {
+ return FileOutputConfigurator.getAccumuloConfiguration(CLASS, job);
+ }
+
+ /**
+ * Sets the compression type to use for data blocks. Specifying a compression may require additional libraries to be available to your Job.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param compressionType
+ * one of "none", "gz", "lzo", or "snappy"
+ * @since 1.5.0
+ */
+ public static void setCompressionType(JobConf job, String compressionType) {
+ FileOutputConfigurator.setCompressionType(CLASS, job, compressionType);
+ }
+
+ /**
+ * Sets the size for data blocks within each file.<br />
+ * Data blocks are a span of key/value pairs stored in the file that are compressed and indexed as a group.
+ *
+ * <p>
+ * Making this value smaller may increase seek performance, but at the cost of increasing the size of the indexes (which can also affect seek performance).
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param dataBlockSize
+ * the block size, in bytes
+ * @since 1.5.0
+ */
+ public static void setDataBlockSize(JobConf job, long dataBlockSize) {
+ FileOutputConfigurator.setDataBlockSize(CLASS, job, dataBlockSize);
+ }
+
+ /**
+ * Sets the size for file blocks in the file system; file blocks are managed, and replicated, by the underlying file system.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param fileBlockSize
+ * the block size, in bytes
+ * @since 1.5.0
+ */
+ public static void setFileBlockSize(JobConf job, long fileBlockSize) {
+ FileOutputConfigurator.setFileBlockSize(CLASS, job, fileBlockSize);
+ }
+
+ /**
+ * Sets the size for index blocks within each file; smaller blocks means a deeper index hierarchy within the file, while larger blocks mean a more shallow
+ * index hierarchy within the file. This can affect the performance of queries.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param indexBlockSize
+ * the block size, in bytes
+ * @since 1.5.0
+ */
+ public static void setIndexBlockSize(JobConf job, long indexBlockSize) {
+ FileOutputConfigurator.setIndexBlockSize(CLASS, job, indexBlockSize);
+ }
+
+ /**
+ * Sets the file system replication factor for the resulting file, overriding the file system default.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param replication
+ * the number of replicas for produced files
+ * @since 1.5.0
+ */
+ public static void setReplication(JobConf job, int replication) {
+ FileOutputConfigurator.setReplication(CLASS, job, replication);
+ }
+
+ @Override
+ public RecordWriter<Key,Value> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
+ // get the path of the temporary output file
+ final Configuration conf = job;
+ final AccumuloConfiguration acuConf = getAccumuloConfiguration(job);
+
+ final String extension = acuConf.get(Property.TABLE_FILE_TYPE);
+ final Path file = new Path(getWorkOutputPath(job), getUniqueName(job, "part") + "." + extension);
+
+ final LRUMap validVisibilities = new LRUMap(1000);
+
+ return new RecordWriter<Key,Value>() {
+ FileSKVWriter out = null;
+
+ @Override
+ public void close(Reporter reporter) throws IOException {
+ if (out != null)
+ out.close();
+ }
+
+ @Override
+ public void write(Key key, Value value) throws IOException {
+
+ Boolean wasChecked = (Boolean) validVisibilities.get(key.getColumnVisibilityData());
+ if (wasChecked == null) {
+ byte[] cv = key.getColumnVisibilityData().toArray();
+ new ColumnVisibility(cv);
+ validVisibilities.put(new ArrayByteSequence(Arrays.copyOf(cv, cv.length)), Boolean.TRUE);
+ }
+
+ if (out == null) {
+ out = FileOperations.getInstance().openWriter(file.toString(), file.getFileSystem(conf), conf, acuConf);
+ out.startDefaultLocalityGroup();
+ }
+ out.append(key, value);
+ }
+ };
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormat.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormat.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormat.java
new file mode 100644
index 0000000..18e286a
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormat.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapred;
+
+import java.io.IOException;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.client.ClientConfiguration;
+import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.format.DefaultFormatter;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.log4j.Level;
+
+/**
+ * This class allows MapReduce jobs to use Accumulo as the source of data. This {@link InputFormat} provides keys and values of type {@link Key} and
+ * {@link Value} to the Map function.
+ *
+ * The user must specify the following via static configurator methods:
+ *
+ * <ul>
+ * <li>{@link AccumuloInputFormat#setConnectorInfo(JobConf, String, AuthenticationToken)}
+ * <li>{@link AccumuloInputFormat#setConnectorInfo(JobConf, String, String)}
+ * <li>{@link AccumuloInputFormat#setScanAuthorizations(JobConf, Authorizations)}
+ * <li>{@link AccumuloInputFormat#setZooKeeperInstance(JobConf, ClientConfiguration)} OR
+ * {@link AccumuloInputFormat#setMockInstance(JobConf, String)}
+ * </ul>
+ *
+ * Other static methods are optional.
+ */
+public class AccumuloInputFormat extends InputFormatBase<Key,Value> {
+
+ @Override
+ public RecordReader<Key,Value> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
+ log.setLevel(getLogLevel(job));
+
+ // Override the log level from the configuration as if the RangeInputSplit has one it's the more correct one to use.
+ if (split instanceof org.apache.accumulo.core.client.mapreduce.RangeInputSplit) {
+ org.apache.accumulo.core.client.mapreduce.RangeInputSplit risplit = (org.apache.accumulo.core.client.mapreduce.RangeInputSplit) split;
+ Level level = risplit.getLogLevel();
+ if (null != level) {
+ log.setLevel(level);
+ }
+ }
+
+ RecordReaderBase<Key,Value> recordReader = new RecordReaderBase<Key,Value>() {
+
+ @Override
+ public boolean next(Key key, Value value) throws IOException {
+ if (scannerIterator.hasNext()) {
+ ++numKeysRead;
+ Entry<Key,Value> entry = scannerIterator.next();
+ key.set(currentKey = entry.getKey());
+ value.set(entry.getValue().get());
+ if (log.isTraceEnabled())
+ log.trace("Processing key/value pair: " + DefaultFormatter.formatEntry(entry, true));
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public Key createKey() {
+ return new Key();
+ }
+
+ @Override
+ public Value createValue() {
+ return new Value();
+ }
+
+ };
+ recordReader.initialize(split, job);
+ return recordReader;
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormat.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormat.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormat.java
new file mode 100644
index 0000000..bbafef5
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormat.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapred;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.accumulo.core.client.ClientConfiguration;
+import org.apache.accumulo.core.client.mapreduce.InputTableConfig;
+import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.util.format.DefaultFormatter;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * This class allows MapReduce jobs to use multiple Accumulo tables as the source of data. This {@link org.apache.hadoop.mapred.InputFormat} provides keys and
+ * values of type {@link Key} and {@link Value} to the Map function.
+ *
+ * The user must specify the following via static configurator methods:
+ *
+ * <ul>
+ * <li>{@link AccumuloInputFormat#setConnectorInfo(JobConf, String, org.apache.accumulo.core.client.security.tokens.AuthenticationToken)}
+ * <li>{@link AccumuloInputFormat#setConnectorInfo(JobConf, String, String)}
+ * <li>{@link AccumuloInputFormat#setScanAuthorizations(JobConf, org.apache.accumulo.core.security.Authorizations)}
+ * <li>{@link AccumuloInputFormat#setZooKeeperInstance(JobConf, ClientConfiguration)} OR {@link AccumuloInputFormat#setMockInstance(JobConf, String)}
+ * <li>{@link AccumuloMultiTableInputFormat#setInputTableConfigs(org.apache.hadoop.mapred.JobConf, java.util.Map)}
+ * </ul>
+ *
+ * Other static methods are optional.
+ */
+
+public class AccumuloMultiTableInputFormat extends AbstractInputFormat<Key,Value> {
+
+ /**
+ * Sets the {@link InputTableConfig} objects on the given Hadoop configuration
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param configs
+ * the table query configs to be set on the configuration.
+ * @since 1.6.0
+ */
+ public static void setInputTableConfigs(JobConf job, Map<String,InputTableConfig> configs) {
+ InputConfigurator.setInputTableConfigs(CLASS, job, configs);
+ }
+
+ @Override
+ public RecordReader<Key,Value> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
+ log.setLevel(getLogLevel(job));
+ InputFormatBase.RecordReaderBase<Key,Value> recordReader = new InputFormatBase.RecordReaderBase<Key,Value>() {
+
+ @Override
+ public boolean next(Key key, Value value) throws IOException {
+ if (scannerIterator.hasNext()) {
+ ++numKeysRead;
+ Map.Entry<Key,Value> entry = scannerIterator.next();
+ key.set(currentKey = entry.getKey());
+ value.set(entry.getValue().get());
+ if (log.isTraceEnabled())
+ log.trace("Processing key/value pair: " + DefaultFormatter.formatEntry(entry, true));
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public Key createKey() {
+ return new Key();
+ }
+
+ @Override
+ public Value createValue() {
+ return new Value();
+ }
+
+ };
+ recordReader.initialize(split, job);
+ return recordReader;
+ }
+}
[12/12] git commit: ACCUMULO-1880 create mapreduce module
Posted by md...@apache.org.
ACCUMULO-1880 create mapreduce module
Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/4dfcb9de
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/4dfcb9de
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/4dfcb9de
Branch: refs/heads/master
Commit: 4dfcb9dec00b21b4cb8c3219436b34a7e31f9958
Parents: f74c5c6
Author: Mike Drob <md...@cloudera.com>
Authored: Mon Apr 21 15:24:22 2014 -0400
Committer: Mike Drob <md...@cloudera.com>
Committed: Mon Apr 21 17:18:10 2014 -0400
----------------------------------------------------------------------
assemble/pom.xml | 4 +
.../core/cli/MapReduceClientOnDefaultTable.java | 49 --
.../cli/MapReduceClientOnRequiredTable.java | 53 --
.../accumulo/core/cli/MapReduceClientOpts.java | 32 -
.../core/client/mapred/AbstractInputFormat.java | 607 --------------
.../client/mapred/AccumuloFileOutputFormat.java | 178 -----
.../core/client/mapred/AccumuloInputFormat.java | 96 ---
.../mapred/AccumuloMultiTableInputFormat.java | 98 ---
.../client/mapred/AccumuloOutputFormat.java | 539 -------------
.../client/mapred/AccumuloRowInputFormat.java | 87 --
.../core/client/mapred/InputFormatBase.java | 383 ---------
.../core/client/mapred/RangeInputSplit.java | 40 -
.../client/mapreduce/AbstractInputFormat.java | 672 ----------------
.../mapreduce/AccumuloFileOutputFormat.java | 177 -----
.../client/mapreduce/AccumuloInputFormat.java | 81 --
.../AccumuloMultiTableInputFormat.java | 101 ---
.../client/mapreduce/AccumuloOutputFormat.java | 545 -------------
.../mapreduce/AccumuloRowInputFormat.java | 79 --
.../core/client/mapreduce/InputFormatBase.java | 384 ---------
.../core/client/mapreduce/InputTableConfig.java | 367 ---------
.../core/client/mapreduce/RangeInputSplit.java | 490 ------------
.../mapreduce/lib/impl/ConfiguratorBase.java | 369 ---------
.../lib/impl/DistributedCacheHelper.java | 52 --
.../lib/impl/FileOutputConfigurator.java | 187 -----
.../mapreduce/lib/impl/InputConfigurator.java | 796 -------------------
.../mapreduce/lib/impl/OutputConfigurator.java | 204 -----
.../client/mapreduce/lib/impl/package-info.java | 34 -
.../lib/partition/KeyRangePartitioner.java | 60 --
.../lib/partition/RangePartitioner.java | 135 ----
.../mapred/AccumuloFileOutputFormatTest.java | 247 ------
.../client/mapred/AccumuloInputFormatTest.java | 285 -------
.../AccumuloMultiTableInputFormatTest.java | 188 -----
.../client/mapred/AccumuloOutputFormatTest.java | 203 -----
.../mapred/AccumuloRowInputFormatTest.java | 206 -----
.../core/client/mapred/TokenFileTest.java | 177 -----
.../mapreduce/AccumuloFileOutputFormatTest.java | 239 ------
.../mapreduce/AccumuloInputFormatTest.java | 412 ----------
.../AccumuloMultiTableInputFormatTest.java | 185 -----
.../mapreduce/AccumuloOutputFormatTest.java | 198 -----
.../mapreduce/AccumuloRowInputFormatTest.java | 202 -----
.../BadPasswordSplitsAccumuloInputFormat.java | 42 -
.../EmptySplitsAccumuloInputFormat.java | 45 --
.../client/mapreduce/InputTableConfigTest.java | 107 ---
.../client/mapreduce/RangeInputSplitTest.java | 107 ---
.../core/client/mapreduce/TokenFileTest.java | 171 ----
.../lib/impl/ConfiguratorBaseTest.java | 129 ---
.../lib/partition/RangePartitionerTest.java | 82 --
examples/simple/pom.xml | 4 +
mapreduce/pom.xml | 67 ++
.../core/cli/MapReduceClientOnDefaultTable.java | 49 ++
.../cli/MapReduceClientOnRequiredTable.java | 53 ++
.../accumulo/core/cli/MapReduceClientOpts.java | 32 +
.../core/client/mapred/AbstractInputFormat.java | 607 ++++++++++++++
.../client/mapred/AccumuloFileOutputFormat.java | 178 +++++
.../core/client/mapred/AccumuloInputFormat.java | 96 +++
.../mapred/AccumuloMultiTableInputFormat.java | 98 +++
.../client/mapred/AccumuloOutputFormat.java | 539 +++++++++++++
.../client/mapred/AccumuloRowInputFormat.java | 87 ++
.../core/client/mapred/InputFormatBase.java | 383 +++++++++
.../core/client/mapred/RangeInputSplit.java | 40 +
.../client/mapreduce/AbstractInputFormat.java | 672 ++++++++++++++++
.../mapreduce/AccumuloFileOutputFormat.java | 177 +++++
.../client/mapreduce/AccumuloInputFormat.java | 81 ++
.../AccumuloMultiTableInputFormat.java | 101 +++
.../client/mapreduce/AccumuloOutputFormat.java | 545 +++++++++++++
.../mapreduce/AccumuloRowInputFormat.java | 79 ++
.../core/client/mapreduce/InputFormatBase.java | 384 +++++++++
.../core/client/mapreduce/InputTableConfig.java | 367 +++++++++
.../core/client/mapreduce/RangeInputSplit.java | 490 ++++++++++++
.../mapreduce/lib/impl/ConfiguratorBase.java | 369 +++++++++
.../lib/impl/DistributedCacheHelper.java | 52 ++
.../lib/impl/FileOutputConfigurator.java | 187 +++++
.../mapreduce/lib/impl/InputConfigurator.java | 796 +++++++++++++++++++
.../mapreduce/lib/impl/OutputConfigurator.java | 204 +++++
.../client/mapreduce/lib/impl/package-info.java | 34 +
.../lib/partition/KeyRangePartitioner.java | 60 ++
.../lib/partition/RangePartitioner.java | 135 ++++
mapreduce/src/main/resources/.gitignore | 0
.../mapred/AccumuloFileOutputFormatTest.java | 247 ++++++
.../client/mapred/AccumuloInputFormatTest.java | 285 +++++++
.../AccumuloMultiTableInputFormatTest.java | 188 +++++
.../client/mapred/AccumuloOutputFormatTest.java | 203 +++++
.../mapred/AccumuloRowInputFormatTest.java | 206 +++++
.../core/client/mapred/TokenFileTest.java | 177 +++++
.../mapreduce/AccumuloFileOutputFormatTest.java | 239 ++++++
.../mapreduce/AccumuloInputFormatTest.java | 412 ++++++++++
.../AccumuloMultiTableInputFormatTest.java | 185 +++++
.../mapreduce/AccumuloOutputFormatTest.java | 198 +++++
.../mapreduce/AccumuloRowInputFormatTest.java | 202 +++++
.../BadPasswordSplitsAccumuloInputFormat.java | 42 +
.../EmptySplitsAccumuloInputFormat.java | 45 ++
.../client/mapreduce/InputTableConfigTest.java | 107 +++
.../client/mapreduce/RangeInputSplitTest.java | 107 +++
.../core/client/mapreduce/TokenFileTest.java | 171 ++++
.../lib/impl/ConfiguratorBaseTest.java | 129 +++
.../lib/partition/RangePartitionerTest.java | 82 ++
mapreduce/src/test/resources/log4j.properties | 28 +
pom.xml | 6 +
98 files changed, 10229 insertions(+), 10120 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/assemble/pom.xml
----------------------------------------------------------------------
diff --git a/assemble/pom.xml b/assemble/pom.xml
index 378273a..d30c265 100644
--- a/assemble/pom.xml
+++ b/assemble/pom.xml
@@ -65,6 +65,10 @@
</dependency>
<dependency>
<groupId>org.apache.accumulo</groupId>
+ <artifactId>accumulo-mapreduce</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.accumulo</groupId>
<artifactId>accumulo-master</artifactId>
</dependency>
<dependency>
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOnDefaultTable.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOnDefaultTable.java b/core/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOnDefaultTable.java
deleted file mode 100644
index e7a3dd4..0000000
--- a/core/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOnDefaultTable.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.cli;
-
-import org.apache.accumulo.core.client.AccumuloSecurityException;
-import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
-import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
-import org.apache.hadoop.mapreduce.Job;
-
-import com.beust.jcommander.Parameter;
-
-public class MapReduceClientOnDefaultTable extends MapReduceClientOpts {
- @Parameter(names = "--table", description = "table to use")
- public String tableName;
-
- public MapReduceClientOnDefaultTable(String table) {
- this.tableName = table;
- }
-
- public String getTableName() {
- return tableName;
- }
-
- @Override
- public void setAccumuloConfigs(Job job) throws AccumuloSecurityException {
- super.setAccumuloConfigs(job);
- AccumuloInputFormat.setConnectorInfo(job, principal, getToken());
- AccumuloInputFormat.setInputTableName(job, getTableName());
- AccumuloInputFormat.setScanAuthorizations(job, auths);
- AccumuloOutputFormat.setConnectorInfo(job, principal, getToken());
- AccumuloOutputFormat.setCreateTables(job, true);
- AccumuloOutputFormat.setDefaultTableName(job, getTableName());
- }
-
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOnRequiredTable.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOnRequiredTable.java b/core/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOnRequiredTable.java
deleted file mode 100644
index abfc17d..0000000
--- a/core/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOnRequiredTable.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.cli;
-
-import org.apache.accumulo.core.client.AccumuloSecurityException;
-import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
-import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
-import org.apache.hadoop.mapreduce.Job;
-
-import com.beust.jcommander.Parameter;
-
-public class MapReduceClientOnRequiredTable extends MapReduceClientOpts {
- @Parameter(names = {"-t", "--table"}, required = true, description = "table to use")
- private String tableName;
-
- @Parameter(names = {"-tf", "--tokenFile"}, description = "File in hdfs containing the user's authentication token create with \"bin/accumulo create-token\"")
- private String tokenFile = "";
-
- @Override
- public void setAccumuloConfigs(Job job) throws AccumuloSecurityException {
- super.setAccumuloConfigs(job);
-
- if (tokenFile.isEmpty()) {
- AccumuloInputFormat.setConnectorInfo(job, principal, getToken());
- AccumuloOutputFormat.setConnectorInfo(job, principal, getToken());
- } else {
- AccumuloInputFormat.setConnectorInfo(job, principal, tokenFile);
- AccumuloOutputFormat.setConnectorInfo(job, principal, tokenFile);
- }
- AccumuloInputFormat.setInputTableName(job, getTableName());
- AccumuloInputFormat.setScanAuthorizations(job, auths);
- AccumuloOutputFormat.setCreateTables(job, true);
- AccumuloOutputFormat.setDefaultTableName(job, getTableName());
- }
-
- public String getTableName() {
- return tableName;
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOpts.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOpts.java b/core/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOpts.java
deleted file mode 100644
index 4b3b7ed..0000000
--- a/core/src/main/java/org/apache/accumulo/core/cli/MapReduceClientOpts.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.cli;
-
-import org.apache.accumulo.core.client.AccumuloSecurityException;
-import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
-import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
-import org.apache.hadoop.mapreduce.Job;
-
-/**
- * Adds some MR awareness to the ClientOpts
- */
-public class MapReduceClientOpts extends ClientOpts {
- public void setAccumuloConfigs(Job job) throws AccumuloSecurityException {
- AccumuloInputFormat.setZooKeeperInstance(job, this.getClientConfiguration());
- AccumuloOutputFormat.setZooKeeperInstance(job, this.getClientConfiguration());
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
deleted file mode 100644
index 5af78d2..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
+++ /dev/null
@@ -1,607 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapred;
-
-import java.io.IOException;
-import java.net.InetAddress;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-
-import org.apache.accumulo.core.client.AccumuloException;
-import org.apache.accumulo.core.client.AccumuloSecurityException;
-import org.apache.accumulo.core.client.ClientConfiguration;
-import org.apache.accumulo.core.client.ClientSideIteratorScanner;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.Instance;
-import org.apache.accumulo.core.client.IsolatedScanner;
-import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.client.Scanner;
-import org.apache.accumulo.core.client.TableDeletedException;
-import org.apache.accumulo.core.client.TableNotFoundException;
-import org.apache.accumulo.core.client.TableOfflineException;
-import org.apache.accumulo.core.client.impl.OfflineScanner;
-import org.apache.accumulo.core.client.impl.ScannerImpl;
-import org.apache.accumulo.core.client.impl.Tables;
-import org.apache.accumulo.core.client.impl.TabletLocator;
-import org.apache.accumulo.core.client.mapreduce.InputTableConfig;
-import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.KeyExtent;
-import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.master.state.tables.TableState;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.core.security.Credentials;
-import org.apache.accumulo.core.util.Pair;
-import org.apache.accumulo.core.util.UtilWaitThread;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
-import org.apache.log4j.Level;
-import org.apache.log4j.Logger;
-
-/**
- * An abstract input format to provide shared methods common to all other input format classes. At the very least, any classes inheriting from this class will
- * need to define their own {@link RecordReader}.
- */
-public abstract class AbstractInputFormat<K,V> implements InputFormat<K,V> {
- protected static final Class<?> CLASS = AccumuloInputFormat.class;
- protected static final Logger log = Logger.getLogger(CLASS);
-
- /**
- * Sets the connector information needed to communicate with Accumulo in this job.
- *
- * <p>
- * <b>WARNING:</b> The serialized token is stored in the configuration and shared with all MapReduce tasks. It is BASE64 encoded to provide a charset safe
- * conversion to a string, and is not intended to be secure.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param principal
- * a valid Accumulo user name (user must have Table.CREATE permission)
- * @param token
- * the user's password
- * @since 1.5.0
- */
- public static void setConnectorInfo(JobConf job, String principal, AuthenticationToken token) throws AccumuloSecurityException {
- InputConfigurator.setConnectorInfo(CLASS, job, principal, token);
- }
-
- /**
- * Sets the connector information needed to communicate with Accumulo in this job.
- *
- * <p>
- * Stores the password in a file in HDFS and pulls that into the Distributed Cache in an attempt to be more secure than storing it in the Configuration.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param principal
- * a valid Accumulo user name (user must have Table.CREATE permission)
- * @param tokenFile
- * the path to the token file
- * @since 1.6.0
- */
- public static void setConnectorInfo(JobConf job, String principal, String tokenFile) throws AccumuloSecurityException {
- InputConfigurator.setConnectorInfo(CLASS, job, principal, tokenFile);
- }
-
- /**
- * Determines if the connector has been configured.
- *
- * @param job
- * the Hadoop context for the configured job
- * @return true if the connector has been configured, false otherwise
- * @since 1.5.0
- * @see #setConnectorInfo(JobConf, String, AuthenticationToken)
- */
- protected static Boolean isConnectorInfoSet(JobConf job) {
- return InputConfigurator.isConnectorInfoSet(CLASS, job);
- }
-
- /**
- * Gets the user name from the configuration.
- *
- * @param job
- * the Hadoop context for the configured job
- * @return the user name
- * @since 1.5.0
- * @see #setConnectorInfo(JobConf, String, AuthenticationToken)
- */
- protected static String getPrincipal(JobConf job) {
- return InputConfigurator.getPrincipal(CLASS, job);
- }
-
- /**
- * Gets the authenticated token from either the specified token file or directly from the configuration, whichever was used when the job was configured.
- *
- * @param job
- * the Hadoop context for the configured job
- * @return the principal's authentication token
- * @since 1.6.0
- * @see #setConnectorInfo(JobConf, String, AuthenticationToken)
- * @see #setConnectorInfo(JobConf, String, String)
- */
- protected static AuthenticationToken getAuthenticationToken(JobConf job) {
- return InputConfigurator.getAuthenticationToken(CLASS, job);
- }
-
- /**
- * Configures a {@link org.apache.accumulo.core.client.ZooKeeperInstance} for this job.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param clientConfig
- * client configuration containing connection options
- * @since 1.6.0
- */
- public static void setZooKeeperInstance(JobConf job, ClientConfiguration clientConfig) {
- InputConfigurator.setZooKeeperInstance(CLASS, job, clientConfig);
- }
-
- /**
- * Configures a {@link org.apache.accumulo.core.client.mock.MockInstance} for this job.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param instanceName
- * the Accumulo instance name
- * @since 1.5.0
- */
- public static void setMockInstance(JobConf job, String instanceName) {
- InputConfigurator.setMockInstance(CLASS, job, instanceName);
- }
-
- /**
- * Initializes an Accumulo {@link org.apache.accumulo.core.client.Instance} based on the configuration.
- *
- * @param job
- * the Hadoop context for the configured job
- * @return an Accumulo instance
- * @since 1.5.0
- * @see #setZooKeeperInstance(JobConf, ClientConfiguration)
- * @see #setMockInstance(JobConf, String)
- */
- protected static Instance getInstance(JobConf job) {
- return InputConfigurator.getInstance(CLASS, job);
- }
-
- /**
- * Sets the log level for this job.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param level
- * the logging level
- * @since 1.5.0
- */
- public static void setLogLevel(JobConf job, Level level) {
- InputConfigurator.setLogLevel(CLASS, job, level);
- }
-
- /**
- * Gets the log level from this configuration.
- *
- * @param job
- * the Hadoop context for the configured job
- * @return the log level
- * @since 1.5.0
- * @see #setLogLevel(JobConf, Level)
- */
- protected static Level getLogLevel(JobConf job) {
- return InputConfigurator.getLogLevel(CLASS, job);
- }
-
- /**
- * Sets the {@link org.apache.accumulo.core.security.Authorizations} used to scan. Must be a subset of the user's authorization. Defaults to the empty set.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param auths
- * the user's authorizations
- * @since 1.5.0
- */
- public static void setScanAuthorizations(JobConf job, Authorizations auths) {
- InputConfigurator.setScanAuthorizations(CLASS, job, auths);
- }
-
- /**
- * Gets the authorizations to set for the scans from the configuration.
- *
- * @param job
- * the Hadoop context for the configured job
- * @return the Accumulo scan authorizations
- * @since 1.5.0
- * @see #setScanAuthorizations(JobConf, Authorizations)
- */
- protected static Authorizations getScanAuthorizations(JobConf job) {
- return InputConfigurator.getScanAuthorizations(CLASS, job);
- }
-
- /**
- * Initializes an Accumulo {@link org.apache.accumulo.core.client.impl.TabletLocator} based on the configuration.
- *
- * @param job
- * the Hadoop context for the configured job
- * @return an Accumulo tablet locator
- * @throws org.apache.accumulo.core.client.TableNotFoundException
- * if the table name set on the configuration doesn't exist
- * @since 1.6.0
- */
- protected static TabletLocator getTabletLocator(JobConf job, String tableId) throws TableNotFoundException {
- return InputConfigurator.getTabletLocator(CLASS, job, tableId);
- }
-
- // InputFormat doesn't have the equivalent of OutputFormat's checkOutputSpecs(JobContext job)
- /**
- * Check whether a configuration is fully configured to be used with an Accumulo {@link InputFormat}.
- *
- * @param job
- * the Hadoop context for the configured job
- * @throws java.io.IOException
- * if the context is improperly configured
- * @since 1.5.0
- */
- protected static void validateOptions(JobConf job) throws IOException {
- InputConfigurator.validateOptions(CLASS, job);
- }
-
- /**
- * Fetches all {@link InputTableConfig}s that have been set on the given Hadoop job.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @return the {@link InputTableConfig} objects set on the job
- * @since 1.6.0
- */
- public static Map<String,InputTableConfig> getInputTableConfigs(JobConf job) {
- return InputConfigurator.getInputTableConfigs(CLASS, job);
- }
-
- /**
- * Fetches a {@link InputTableConfig} that has been set on the configuration for a specific table.
- *
- * <p>
- * null is returned in the event that the table doesn't exist.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param tableName
- * the table name for which to grab the config object
- * @return the {@link InputTableConfig} for the given table
- * @since 1.6.0
- */
- public static InputTableConfig getInputTableConfig(JobConf job, String tableName) {
- return InputConfigurator.getInputTableConfig(CLASS, job, tableName);
- }
-
- /**
- * An abstract base class to be used to create {@link org.apache.hadoop.mapred.RecordReader} instances that convert from Accumulo
- * {@link org.apache.accumulo.core.data.Key}/{@link org.apache.accumulo.core.data.Value} pairs to the user's K/V types.
- *
- * Subclasses must implement {@link #next(Object, Object)} to update key and value, and also to update the following variables:
- * <ul>
- * <li>Key {@link #currentKey} (used for progress reporting)</li>
- * <li>int {@link #numKeysRead} (used for progress reporting)</li>
- * </ul>
- */
- protected abstract static class AbstractRecordReader<K,V> implements RecordReader<K,V> {
- protected long numKeysRead;
- protected Iterator<Map.Entry<Key,Value>> scannerIterator;
- protected RangeInputSplit split;
-
- /**
- * Configures the iterators on a scanner for the given table name.
- *
- * @param job
- * the Hadoop job configuration
- * @param scanner
- * the scanner for which to configure the iterators
- * @param tableName
- * the table name for which the scanner is configured
- * @since 1.6.0
- */
- protected abstract void setupIterators(JobConf job, Scanner scanner, String tableName, RangeInputSplit split);
-
- /**
- * Initialize a scanner over the given input split using this task attempt configuration.
- */
- public void initialize(InputSplit inSplit, JobConf job) throws IOException {
- Scanner scanner;
- split = (RangeInputSplit) inSplit;
- log.debug("Initializing input split: " + split.getRange());
-
- Instance instance = split.getInstance();
- if (null == instance) {
- instance = getInstance(job);
- }
-
- String principal = split.getPrincipal();
- if (null == principal) {
- principal = getPrincipal(job);
- }
-
- AuthenticationToken token = split.getToken();
- if (null == token) {
- token = getAuthenticationToken(job);
- }
-
- Authorizations authorizations = split.getAuths();
- if (null == authorizations) {
- authorizations = getScanAuthorizations(job);
- }
-
- String table = split.getTableName();
-
- // in case the table name changed, we can still use the previous name for terms of configuration,
- // but the scanner will use the table id resolved at job setup time
- InputTableConfig tableConfig = getInputTableConfig(job, split.getTableName());
-
- Boolean isOffline = split.isOffline();
- if (null == isOffline) {
- isOffline = tableConfig.isOfflineScan();
- }
-
- Boolean isIsolated = split.isIsolatedScan();
- if (null == isIsolated) {
- isIsolated = tableConfig.shouldUseIsolatedScanners();
- }
-
- Boolean usesLocalIterators = split.usesLocalIterators();
- if (null == usesLocalIterators) {
- usesLocalIterators = tableConfig.shouldUseLocalIterators();
- }
-
- List<IteratorSetting> iterators = split.getIterators();
- if (null == iterators) {
- iterators = tableConfig.getIterators();
- }
-
- Collection<Pair<Text,Text>> columns = split.getFetchedColumns();
- if (null == columns) {
- columns = tableConfig.getFetchedColumns();
- }
-
- try {
- log.debug("Creating connector with user: " + principal);
- log.debug("Creating scanner for table: " + table);
- log.debug("Authorizations are: " + authorizations);
- if (isOffline) {
- scanner = new OfflineScanner(instance, new Credentials(principal, token), split.getTableId(), authorizations);
- } else if (instance instanceof MockInstance) {
- scanner = instance.getConnector(principal, token).createScanner(split.getTableName(), authorizations);
- } else {
- scanner = new ScannerImpl(instance, new Credentials(principal, token), split.getTableId(), authorizations);
- }
- if (isIsolated) {
- log.info("Creating isolated scanner");
- scanner = new IsolatedScanner(scanner);
- }
- if (usesLocalIterators) {
- log.info("Using local iterators");
- scanner = new ClientSideIteratorScanner(scanner);
- }
- setupIterators(job, scanner, split.getTableName(), split);
- } catch (Exception e) {
- throw new IOException(e);
- }
-
- // setup a scanner within the bounds of this split
- for (Pair<Text,Text> c : columns) {
- if (c.getSecond() != null) {
- log.debug("Fetching column " + c.getFirst() + ":" + c.getSecond());
- scanner.fetchColumn(c.getFirst(), c.getSecond());
- } else {
- log.debug("Fetching column family " + c.getFirst());
- scanner.fetchColumnFamily(c.getFirst());
- }
- }
-
- scanner.setRange(split.getRange());
-
- numKeysRead = 0;
-
- // do this last after setting all scanner options
- scannerIterator = scanner.iterator();
- }
-
- @Override
- public void close() {}
-
- @Override
- public long getPos() throws IOException {
- return numKeysRead;
- }
-
- @Override
- public float getProgress() throws IOException {
- if (numKeysRead > 0 && currentKey == null)
- return 1.0f;
- return split.getProgress(currentKey);
- }
-
- protected Key currentKey = null;
-
- }
-
- Map<String,Map<KeyExtent,List<Range>>> binOfflineTable(JobConf job, String tableId, List<Range> ranges) throws TableNotFoundException, AccumuloException,
- AccumuloSecurityException {
-
- Instance instance = getInstance(job);
- Connector conn = instance.getConnector(getPrincipal(job), getAuthenticationToken(job));
-
- return InputConfigurator.binOffline(tableId, ranges, instance, conn);
- }
-
- /**
- * Read the metadata table to get tablets and match up ranges to them.
- */
- @Override
- public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
- Level logLevel = getLogLevel(job);
- log.setLevel(logLevel);
- validateOptions(job);
-
- Random random = new Random();
- LinkedList<InputSplit> splits = new LinkedList<InputSplit>();
- Map<String,InputTableConfig> tableConfigs = getInputTableConfigs(job);
- for (Map.Entry<String,InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) {
- String tableName = tableConfigEntry.getKey();
- InputTableConfig tableConfig = tableConfigEntry.getValue();
-
- Instance instance = getInstance(job);
- boolean mockInstance;
- String tableId;
- // resolve table name to id once, and use id from this point forward
- if (instance instanceof MockInstance) {
- tableId = "";
- mockInstance = true;
- } else {
- try {
- tableId = Tables.getTableId(instance, tableName);
- } catch (TableNotFoundException e) {
- throw new IOException(e);
- }
- mockInstance = false;
- }
-
- Authorizations auths = getScanAuthorizations(job);
- String principal = getPrincipal(job);
- AuthenticationToken token = getAuthenticationToken(job);
-
- boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
- List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges();
- if (ranges.isEmpty()) {
- ranges = new ArrayList<Range>(1);
- ranges.add(new Range());
- }
-
- // get the metadata information for these ranges
- Map<String,Map<KeyExtent,List<Range>>> binnedRanges = new HashMap<String,Map<KeyExtent,List<Range>>>();
- TabletLocator tl;
- try {
- if (tableConfig.isOfflineScan()) {
- binnedRanges = binOfflineTable(job, tableId, ranges);
- while (binnedRanges == null) {
- // Some tablets were still online, try again
- UtilWaitThread.sleep(100 + random.nextInt(100)); // sleep randomly between 100 and 200 ms
- binnedRanges = binOfflineTable(job, tableId, ranges);
- }
- } else {
- tl = getTabletLocator(job, tableId);
- // its possible that the cache could contain complete, but old information about a tables tablets... so clear it
- tl.invalidateCache();
- Credentials creds = new Credentials(getPrincipal(job), getAuthenticationToken(job));
-
- while (!tl.binRanges(creds, ranges, binnedRanges).isEmpty()) {
- if (!(instance instanceof MockInstance)) {
- if (!Tables.exists(instance, tableId))
- throw new TableDeletedException(tableId);
- if (Tables.getTableState(instance, tableId) == TableState.OFFLINE)
- throw new TableOfflineException(instance, tableId);
- }
- binnedRanges.clear();
- log.warn("Unable to locate bins for specified ranges. Retrying.");
- UtilWaitThread.sleep(100 + random.nextInt(100)); // sleep randomly between 100 and 200 ms
- tl.invalidateCache();
- }
- }
- } catch (Exception e) {
- throw new IOException(e);
- }
-
- HashMap<Range,ArrayList<String>> splitsToAdd = null;
-
- if (!autoAdjust)
- splitsToAdd = new HashMap<Range,ArrayList<String>>();
-
- HashMap<String,String> hostNameCache = new HashMap<String,String>();
- for (Map.Entry<String,Map<KeyExtent,List<Range>>> tserverBin : binnedRanges.entrySet()) {
- String ip = tserverBin.getKey().split(":", 2)[0];
- String location = hostNameCache.get(ip);
- if (location == null) {
- InetAddress inetAddress = InetAddress.getByName(ip);
- location = inetAddress.getCanonicalHostName();
- hostNameCache.put(ip, location);
- }
- for (Map.Entry<KeyExtent,List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
- Range ke = extentRanges.getKey().toDataRange();
- for (Range r : extentRanges.getValue()) {
- if (autoAdjust) {
- // divide ranges into smaller ranges, based on the tablets
- RangeInputSplit split = new RangeInputSplit(tableName, tableId, ke.clip(r), new String[] {location});
-
- split.setOffline(tableConfig.isOfflineScan());
- split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
- split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
- split.setMockInstance(mockInstance);
- split.setFetchedColumns(tableConfig.getFetchedColumns());
- split.setPrincipal(principal);
- split.setToken(token);
- split.setInstanceName(instance.getInstanceName());
- split.setZooKeepers(instance.getZooKeepers());
- split.setAuths(auths);
- split.setIterators(tableConfig.getIterators());
- split.setLogLevel(logLevel);
-
- splits.add(split);
- } else {
- // don't divide ranges
- ArrayList<String> locations = splitsToAdd.get(r);
- if (locations == null)
- locations = new ArrayList<String>(1);
- locations.add(location);
- splitsToAdd.put(r, locations);
- }
- }
- }
- }
-
- if (!autoAdjust)
- for (Map.Entry<Range,ArrayList<String>> entry : splitsToAdd.entrySet()) {
- RangeInputSplit split = new RangeInputSplit(tableName, tableId, entry.getKey(), entry.getValue().toArray(new String[0]));
-
- split.setOffline(tableConfig.isOfflineScan());
- split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
- split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
- split.setMockInstance(mockInstance);
- split.setFetchedColumns(tableConfig.getFetchedColumns());
- split.setPrincipal(principal);
- split.setToken(token);
- split.setInstanceName(instance.getInstanceName());
- split.setZooKeepers(instance.getZooKeepers());
- split.setAuths(auths);
- split.setIterators(tableConfig.getIterators());
- split.setLogLevel(logLevel);
-
- splits.add(split);
- }
- }
-
- return splits.toArray(new InputSplit[splits.size()]);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormat.java
deleted file mode 100644
index 8a1d6df..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormat.java
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapred;
-
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.accumulo.core.client.mapreduce.lib.impl.FileOutputConfigurator;
-import org.apache.accumulo.core.conf.AccumuloConfiguration;
-import org.apache.accumulo.core.conf.Property;
-import org.apache.accumulo.core.data.ArrayByteSequence;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.file.FileOperations;
-import org.apache.accumulo.core.file.FileSKVWriter;
-import org.apache.accumulo.core.security.ColumnVisibility;
-import org.apache.commons.collections.map.LRUMap;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordWriter;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.util.Progressable;
-import org.apache.log4j.Logger;
-
-/**
- * This class allows MapReduce jobs to write output in the Accumulo data file format.<br />
- * Care should be taken to write only sorted data (sorted by {@link Key}), as this is an important requirement of Accumulo data files.
- *
- * <p>
- * The output path to be created must be specified via {@link AccumuloFileOutputFormat#setOutputPath(JobConf, Path)}. This is inherited from
- * {@link FileOutputFormat#setOutputPath(JobConf, Path)}. Other methods from {@link FileOutputFormat} are not supported and may be ignored or cause failures.
- * Using other Hadoop configuration options that affect the behavior of the underlying files directly in the Job's configuration may work, but are not directly
- * supported at this time.
- */
-public class AccumuloFileOutputFormat extends FileOutputFormat<Key,Value> {
-
- private static final Class<?> CLASS = AccumuloFileOutputFormat.class;
- protected static final Logger log = Logger.getLogger(CLASS);
-
- /**
- * This helper method provides an AccumuloConfiguration object constructed from the Accumulo defaults, and overridden with Accumulo properties that have been
- * stored in the Job's configuration.
- *
- * @param job
- * the Hadoop context for the configured job
- * @since 1.5.0
- */
- protected static AccumuloConfiguration getAccumuloConfiguration(JobConf job) {
- return FileOutputConfigurator.getAccumuloConfiguration(CLASS, job);
- }
-
- /**
- * Sets the compression type to use for data blocks. Specifying a compression may require additional libraries to be available to your Job.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param compressionType
- * one of "none", "gz", "lzo", or "snappy"
- * @since 1.5.0
- */
- public static void setCompressionType(JobConf job, String compressionType) {
- FileOutputConfigurator.setCompressionType(CLASS, job, compressionType);
- }
-
- /**
- * Sets the size for data blocks within each file.<br />
- * Data blocks are a span of key/value pairs stored in the file that are compressed and indexed as a group.
- *
- * <p>
- * Making this value smaller may increase seek performance, but at the cost of increasing the size of the indexes (which can also affect seek performance).
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param dataBlockSize
- * the block size, in bytes
- * @since 1.5.0
- */
- public static void setDataBlockSize(JobConf job, long dataBlockSize) {
- FileOutputConfigurator.setDataBlockSize(CLASS, job, dataBlockSize);
- }
-
- /**
- * Sets the size for file blocks in the file system; file blocks are managed, and replicated, by the underlying file system.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param fileBlockSize
- * the block size, in bytes
- * @since 1.5.0
- */
- public static void setFileBlockSize(JobConf job, long fileBlockSize) {
- FileOutputConfigurator.setFileBlockSize(CLASS, job, fileBlockSize);
- }
-
- /**
- * Sets the size for index blocks within each file; smaller blocks means a deeper index hierarchy within the file, while larger blocks mean a more shallow
- * index hierarchy within the file. This can affect the performance of queries.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param indexBlockSize
- * the block size, in bytes
- * @since 1.5.0
- */
- public static void setIndexBlockSize(JobConf job, long indexBlockSize) {
- FileOutputConfigurator.setIndexBlockSize(CLASS, job, indexBlockSize);
- }
-
- /**
- * Sets the file system replication factor for the resulting file, overriding the file system default.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param replication
- * the number of replicas for produced files
- * @since 1.5.0
- */
- public static void setReplication(JobConf job, int replication) {
- FileOutputConfigurator.setReplication(CLASS, job, replication);
- }
-
- @Override
- public RecordWriter<Key,Value> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
- // get the path of the temporary output file
- final Configuration conf = job;
- final AccumuloConfiguration acuConf = getAccumuloConfiguration(job);
-
- final String extension = acuConf.get(Property.TABLE_FILE_TYPE);
- final Path file = new Path(getWorkOutputPath(job), getUniqueName(job, "part") + "." + extension);
-
- final LRUMap validVisibilities = new LRUMap(1000);
-
- return new RecordWriter<Key,Value>() {
- FileSKVWriter out = null;
-
- @Override
- public void close(Reporter reporter) throws IOException {
- if (out != null)
- out.close();
- }
-
- @Override
- public void write(Key key, Value value) throws IOException {
-
- Boolean wasChecked = (Boolean) validVisibilities.get(key.getColumnVisibilityData());
- if (wasChecked == null) {
- byte[] cv = key.getColumnVisibilityData().toArray();
- new ColumnVisibility(cv);
- validVisibilities.put(new ArrayByteSequence(Arrays.copyOf(cv, cv.length)), Boolean.TRUE);
- }
-
- if (out == null) {
- out = FileOperations.getInstance().openWriter(file.toString(), file.getFileSystem(conf), conf, acuConf);
- out.startDefaultLocalityGroup();
- }
- out.append(key, value);
- }
- };
- }
-
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormat.java
deleted file mode 100644
index 18e286a..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormat.java
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapred;
-
-import java.io.IOException;
-import java.util.Map.Entry;
-
-import org.apache.accumulo.core.client.ClientConfiguration;
-import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.core.util.format.DefaultFormatter;
-import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.log4j.Level;
-
-/**
- * This class allows MapReduce jobs to use Accumulo as the source of data. This {@link InputFormat} provides keys and values of type {@link Key} and
- * {@link Value} to the Map function.
- *
- * The user must specify the following via static configurator methods:
- *
- * <ul>
- * <li>{@link AccumuloInputFormat#setConnectorInfo(JobConf, String, AuthenticationToken)}
- * <li>{@link AccumuloInputFormat#setConnectorInfo(JobConf, String, String)}
- * <li>{@link AccumuloInputFormat#setScanAuthorizations(JobConf, Authorizations)}
- * <li>{@link AccumuloInputFormat#setZooKeeperInstance(JobConf, ClientConfiguration)} OR
- * {@link AccumuloInputFormat#setMockInstance(JobConf, String)}
- * </ul>
- *
- * Other static methods are optional.
- */
-public class AccumuloInputFormat extends InputFormatBase<Key,Value> {
-
- @Override
- public RecordReader<Key,Value> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
- log.setLevel(getLogLevel(job));
-
- // Override the log level from the configuration as if the RangeInputSplit has one it's the more correct one to use.
- if (split instanceof org.apache.accumulo.core.client.mapreduce.RangeInputSplit) {
- org.apache.accumulo.core.client.mapreduce.RangeInputSplit risplit = (org.apache.accumulo.core.client.mapreduce.RangeInputSplit) split;
- Level level = risplit.getLogLevel();
- if (null != level) {
- log.setLevel(level);
- }
- }
-
- RecordReaderBase<Key,Value> recordReader = new RecordReaderBase<Key,Value>() {
-
- @Override
- public boolean next(Key key, Value value) throws IOException {
- if (scannerIterator.hasNext()) {
- ++numKeysRead;
- Entry<Key,Value> entry = scannerIterator.next();
- key.set(currentKey = entry.getKey());
- value.set(entry.getValue().get());
- if (log.isTraceEnabled())
- log.trace("Processing key/value pair: " + DefaultFormatter.formatEntry(entry, true));
- return true;
- }
- return false;
- }
-
- @Override
- public Key createKey() {
- return new Key();
- }
-
- @Override
- public Value createValue() {
- return new Value();
- }
-
- };
- recordReader.initialize(split, job);
- return recordReader;
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormat.java
deleted file mode 100644
index bbafef5..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormat.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapred;
-
-import java.io.IOException;
-import java.util.Map;
-
-import org.apache.accumulo.core.client.ClientConfiguration;
-import org.apache.accumulo.core.client.mapreduce.InputTableConfig;
-import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.util.format.DefaultFormatter;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hadoop.mapred.Reporter;
-
-/**
- * This class allows MapReduce jobs to use multiple Accumulo tables as the source of data. This {@link org.apache.hadoop.mapred.InputFormat} provides keys and
- * values of type {@link Key} and {@link Value} to the Map function.
- *
- * The user must specify the following via static configurator methods:
- *
- * <ul>
- * <li>{@link AccumuloInputFormat#setConnectorInfo(JobConf, String, org.apache.accumulo.core.client.security.tokens.AuthenticationToken)}
- * <li>{@link AccumuloInputFormat#setConnectorInfo(JobConf, String, String)}
- * <li>{@link AccumuloInputFormat#setScanAuthorizations(JobConf, org.apache.accumulo.core.security.Authorizations)}
- * <li>{@link AccumuloInputFormat#setZooKeeperInstance(JobConf, ClientConfiguration)} OR {@link AccumuloInputFormat#setMockInstance(JobConf, String)}
- * <li>{@link AccumuloMultiTableInputFormat#setInputTableConfigs(org.apache.hadoop.mapred.JobConf, java.util.Map)}
- * </ul>
- *
- * Other static methods are optional.
- */
-
-public class AccumuloMultiTableInputFormat extends AbstractInputFormat<Key,Value> {
-
- /**
- * Sets the {@link InputTableConfig} objects on the given Hadoop configuration
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param configs
- * the table query configs to be set on the configuration.
- * @since 1.6.0
- */
- public static void setInputTableConfigs(JobConf job, Map<String,InputTableConfig> configs) {
- InputConfigurator.setInputTableConfigs(CLASS, job, configs);
- }
-
- @Override
- public RecordReader<Key,Value> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
- log.setLevel(getLogLevel(job));
- InputFormatBase.RecordReaderBase<Key,Value> recordReader = new InputFormatBase.RecordReaderBase<Key,Value>() {
-
- @Override
- public boolean next(Key key, Value value) throws IOException {
- if (scannerIterator.hasNext()) {
- ++numKeysRead;
- Map.Entry<Key,Value> entry = scannerIterator.next();
- key.set(currentKey = entry.getKey());
- value.set(entry.getValue().get());
- if (log.isTraceEnabled())
- log.trace("Processing key/value pair: " + DefaultFormatter.formatEntry(entry, true));
- return true;
- }
- return false;
- }
-
- @Override
- public Key createKey() {
- return new Key();
- }
-
- @Override
- public Value createValue() {
- return new Value();
- }
-
- };
- recordReader.initialize(split, job);
- return recordReader;
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloOutputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloOutputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloOutputFormat.java
deleted file mode 100644
index 122b4cd..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloOutputFormat.java
+++ /dev/null
@@ -1,539 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapred;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map.Entry;
-import java.util.Set;
-
-import org.apache.accumulo.core.client.AccumuloException;
-import org.apache.accumulo.core.client.AccumuloSecurityException;
-import org.apache.accumulo.core.client.BatchWriter;
-import org.apache.accumulo.core.client.BatchWriterConfig;
-import org.apache.accumulo.core.client.ClientConfiguration;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.Instance;
-import org.apache.accumulo.core.client.MultiTableBatchWriter;
-import org.apache.accumulo.core.client.MutationsRejectedException;
-import org.apache.accumulo.core.client.TableExistsException;
-import org.apache.accumulo.core.client.TableNotFoundException;
-import org.apache.accumulo.core.client.ZooKeeperInstance;
-import org.apache.accumulo.core.client.mapreduce.lib.impl.OutputConfigurator;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.client.security.SecurityErrorCode;
-import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
-import org.apache.accumulo.core.client.security.tokens.AuthenticationToken.AuthenticationTokenSerializer;
-import org.apache.accumulo.core.data.ColumnUpdate;
-import org.apache.accumulo.core.data.KeyExtent;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.security.ColumnVisibility;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputFormat;
-import org.apache.hadoop.mapred.RecordWriter;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.util.Progressable;
-import org.apache.log4j.Level;
-import org.apache.log4j.Logger;
-
-/**
- * This class allows MapReduce jobs to use Accumulo as the sink for data. This {@link OutputFormat} accepts keys and values of type {@link Text} (for a table
- * name) and {@link Mutation} from the Map and Reduce functions.
- *
- * The user must specify the following via static configurator methods:
- *
- * <ul>
- * <li>{@link AccumuloOutputFormat#setConnectorInfo(JobConf, String, AuthenticationToken)}
- * <li>{@link AccumuloOutputFormat#setConnectorInfo(JobConf, String, String)}
- * <li>{@link AccumuloOutputFormat#setZooKeeperInstance(JobConf, ClientConfiguration)} OR {@link AccumuloOutputFormat#setMockInstance(JobConf, String)}
- * </ul>
- *
- * Other static methods are optional.
- */
-public class AccumuloOutputFormat implements OutputFormat<Text,Mutation> {
-
- private static final Class<?> CLASS = AccumuloOutputFormat.class;
- protected static final Logger log = Logger.getLogger(CLASS);
-
- /**
- * Sets the connector information needed to communicate with Accumulo in this job.
- *
- * <p>
- * <b>WARNING:</b> The serialized token is stored in the configuration and shared with all MapReduce tasks. It is BASE64 encoded to provide a charset safe
- * conversion to a string, and is not intended to be secure.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param principal
- * a valid Accumulo user name (user must have Table.CREATE permission if {@link #setCreateTables(JobConf, boolean)} is set to true)
- * @param token
- * the user's password
- * @since 1.5.0
- */
- public static void setConnectorInfo(JobConf job, String principal, AuthenticationToken token) throws AccumuloSecurityException {
- OutputConfigurator.setConnectorInfo(CLASS, job, principal, token);
- }
-
- /**
- * Sets the connector information needed to communicate with Accumulo in this job.
- *
- * <p>
- * Stores the password in a file in HDFS and pulls that into the Distributed Cache in an attempt to be more secure than storing it in the Configuration.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param principal
- * a valid Accumulo user name (user must have Table.CREATE permission if {@link #setCreateTables(JobConf, boolean)} is set to true)
- * @param tokenFile
- * the path to the password file
- * @since 1.6.0
- */
- public static void setConnectorInfo(JobConf job, String principal, String tokenFile) throws AccumuloSecurityException {
- OutputConfigurator.setConnectorInfo(CLASS, job, principal, tokenFile);
- }
-
- /**
- * Determines if the connector has been configured.
- *
- * @param job
- * the Hadoop context for the configured job
- * @return true if the connector has been configured, false otherwise
- * @since 1.5.0
- * @see #setConnectorInfo(JobConf, String, AuthenticationToken)
- */
- protected static Boolean isConnectorInfoSet(JobConf job) {
- return OutputConfigurator.isConnectorInfoSet(CLASS, job);
- }
-
- /**
- * Gets the principal from the configuration.
- *
- * @param job
- * the Hadoop context for the configured job
- * @return the user name
- * @since 1.5.0
- * @see #setConnectorInfo(JobConf, String, AuthenticationToken)
- */
- protected static String getPrincipal(JobConf job) {
- return OutputConfigurator.getPrincipal(CLASS, job);
- }
-
- /**
- * Gets the serialized token class from either the configuration or the token file.
- *
- * @since 1.5.0
- * @deprecated since 1.6.0; Use {@link #getAuthenticationToken(JobConf)} instead.
- */
- @Deprecated
- protected static String getTokenClass(JobConf job) {
- return getAuthenticationToken(job).getClass().getName();
- }
-
- /**
- * Gets the serialized token from either the configuration or the token file.
- *
- * @since 1.5.0
- * @deprecated since 1.6.0; Use {@link #getAuthenticationToken(JobConf)} instead.
- */
- @Deprecated
- protected static byte[] getToken(JobConf job) {
- return AuthenticationTokenSerializer.serialize(getAuthenticationToken(job));
- }
-
- /**
- * Gets the authenticated token from either the specified token file or directly from the configuration, whichever was used when the job was configured.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @return the principal's authentication token
- * @since 1.6.0
- * @see #setConnectorInfo(JobConf, String, AuthenticationToken)
- * @see #setConnectorInfo(JobConf, String, String)
- */
- protected static AuthenticationToken getAuthenticationToken(JobConf job) {
- return OutputConfigurator.getAuthenticationToken(CLASS, job);
- }
-
- /**
- * Configures a {@link ZooKeeperInstance} for this job.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param clientConfig
- * client configuration for specifying connection timeouts, SSL connection options, etc.
- * @since 1.6.0
- */
- public static void setZooKeeperInstance(JobConf job, ClientConfiguration clientConfig) {
- OutputConfigurator.setZooKeeperInstance(CLASS, job, clientConfig);
- }
-
- /**
- * Configures a {@link MockInstance} for this job.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param instanceName
- * the Accumulo instance name
- * @since 1.5.0
- */
- public static void setMockInstance(JobConf job, String instanceName) {
- OutputConfigurator.setMockInstance(CLASS, job, instanceName);
- }
-
- /**
- * Initializes an Accumulo {@link Instance} based on the configuration.
- *
- * @param job
- * the Hadoop context for the configured job
- * @return an Accumulo instance
- * @since 1.5.0
- * @see #setZooKeeperInstance(JobConf, ClientConfiguration)
- * @see #setMockInstance(JobConf, String)
- */
- protected static Instance getInstance(JobConf job) {
- return OutputConfigurator.getInstance(CLASS, job);
- }
-
- /**
- * Sets the log level for this job.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param level
- * the logging level
- * @since 1.5.0
- */
- public static void setLogLevel(JobConf job, Level level) {
- OutputConfigurator.setLogLevel(CLASS, job, level);
- }
-
- /**
- * Gets the log level from this configuration.
- *
- * @param job
- * the Hadoop context for the configured job
- * @return the log level
- * @since 1.5.0
- * @see #setLogLevel(JobConf, Level)
- */
- protected static Level getLogLevel(JobConf job) {
- return OutputConfigurator.getLogLevel(CLASS, job);
- }
-
- /**
- * Sets the default table name to use if one emits a null in place of a table name for a given mutation. Table names can only be alpha-numeric and
- * underscores.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param tableName
- * the table to use when the tablename is null in the write call
- * @since 1.5.0
- */
- public static void setDefaultTableName(JobConf job, String tableName) {
- OutputConfigurator.setDefaultTableName(CLASS, job, tableName);
- }
-
- /**
- * Gets the default table name from the configuration.
- *
- * @param job
- * the Hadoop context for the configured job
- * @return the default table name
- * @since 1.5.0
- * @see #setDefaultTableName(JobConf, String)
- */
- protected static String getDefaultTableName(JobConf job) {
- return OutputConfigurator.getDefaultTableName(CLASS, job);
- }
-
- /**
- * Sets the configuration for for the job's {@link BatchWriter} instances. If not set, a new {@link BatchWriterConfig}, with sensible built-in defaults is
- * used. Setting the configuration multiple times overwrites any previous configuration.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param bwConfig
- * the configuration for the {@link BatchWriter}
- * @since 1.5.0
- */
- public static void setBatchWriterOptions(JobConf job, BatchWriterConfig bwConfig) {
- OutputConfigurator.setBatchWriterOptions(CLASS, job, bwConfig);
- }
-
- /**
- * Gets the {@link BatchWriterConfig} settings.
- *
- * @param job
- * the Hadoop context for the configured job
- * @return the configuration object
- * @since 1.5.0
- * @see #setBatchWriterOptions(JobConf, BatchWriterConfig)
- */
- protected static BatchWriterConfig getBatchWriterOptions(JobConf job) {
- return OutputConfigurator.getBatchWriterOptions(CLASS, job);
- }
-
- /**
- * Sets the directive to create new tables, as necessary. Table names can only be alpha-numeric and underscores.
- *
- * <p>
- * By default, this feature is <b>disabled</b>.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param enableFeature
- * the feature is enabled if true, disabled otherwise
- * @since 1.5.0
- */
- public static void setCreateTables(JobConf job, boolean enableFeature) {
- OutputConfigurator.setCreateTables(CLASS, job, enableFeature);
- }
-
- /**
- * Determines whether tables are permitted to be created as needed.
- *
- * @param job
- * the Hadoop context for the configured job
- * @return true if the feature is disabled, false otherwise
- * @since 1.5.0
- * @see #setCreateTables(JobConf, boolean)
- */
- protected static Boolean canCreateTables(JobConf job) {
- return OutputConfigurator.canCreateTables(CLASS, job);
- }
-
- /**
- * Sets the directive to use simulation mode for this job. In simulation mode, no output is produced. This is useful for testing.
- *
- * <p>
- * By default, this feature is <b>disabled</b>.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param enableFeature
- * the feature is enabled if true, disabled otherwise
- * @since 1.5.0
- */
- public static void setSimulationMode(JobConf job, boolean enableFeature) {
- OutputConfigurator.setSimulationMode(CLASS, job, enableFeature);
- }
-
- /**
- * Determines whether this feature is enabled.
- *
- * @param job
- * the Hadoop context for the configured job
- * @return true if the feature is enabled, false otherwise
- * @since 1.5.0
- * @see #setSimulationMode(JobConf, boolean)
- */
- protected static Boolean getSimulationMode(JobConf job) {
- return OutputConfigurator.getSimulationMode(CLASS, job);
- }
-
- /**
- * A base class to be used to create {@link RecordWriter} instances that write to Accumulo.
- */
- protected static class AccumuloRecordWriter implements RecordWriter<Text,Mutation> {
- private MultiTableBatchWriter mtbw = null;
- private HashMap<Text,BatchWriter> bws = null;
- private Text defaultTableName = null;
-
- private boolean simulate = false;
- private boolean createTables = false;
-
- private long mutCount = 0;
- private long valCount = 0;
-
- private Connector conn;
-
- protected AccumuloRecordWriter(JobConf job) throws AccumuloException, AccumuloSecurityException, IOException {
- Level l = getLogLevel(job);
- if (l != null)
- log.setLevel(getLogLevel(job));
- this.simulate = getSimulationMode(job);
- this.createTables = canCreateTables(job);
-
- if (simulate)
- log.info("Simulating output only. No writes to tables will occur");
-
- this.bws = new HashMap<Text,BatchWriter>();
-
- String tname = getDefaultTableName(job);
- this.defaultTableName = (tname == null) ? null : new Text(tname);
-
- if (!simulate) {
- this.conn = getInstance(job).getConnector(getPrincipal(job), getAuthenticationToken(job));
- mtbw = conn.createMultiTableBatchWriter(getBatchWriterOptions(job));
- }
- }
-
- /**
- * Push a mutation into a table. If table is null, the defaultTable will be used. If canCreateTable is set, the table will be created if it does not exist.
- * The table name must only contain alphanumerics and underscore.
- */
- @Override
- public void write(Text table, Mutation mutation) throws IOException {
- if (table == null || table.toString().isEmpty())
- table = this.defaultTableName;
-
- if (!simulate && table == null)
- throw new IOException("No table or default table specified. Try simulation mode next time");
-
- ++mutCount;
- valCount += mutation.size();
- printMutation(table, mutation);
-
- if (simulate)
- return;
-
- if (!bws.containsKey(table))
- try {
- addTable(table);
- } catch (Exception e) {
- e.printStackTrace();
- throw new IOException(e);
- }
-
- try {
- bws.get(table).addMutation(mutation);
- } catch (MutationsRejectedException e) {
- throw new IOException(e);
- }
- }
-
- public void addTable(Text tableName) throws AccumuloException, AccumuloSecurityException {
- if (simulate) {
- log.info("Simulating adding table: " + tableName);
- return;
- }
-
- log.debug("Adding table: " + tableName);
- BatchWriter bw = null;
- String table = tableName.toString();
-
- if (createTables && !conn.tableOperations().exists(table)) {
- try {
- conn.tableOperations().create(table);
- } catch (AccumuloSecurityException e) {
- log.error("Accumulo security violation creating " + table, e);
- throw e;
- } catch (TableExistsException e) {
- // Shouldn't happen
- }
- }
-
- try {
- bw = mtbw.getBatchWriter(table);
- } catch (TableNotFoundException e) {
- log.error("Accumulo table " + table + " doesn't exist and cannot be created.", e);
- throw new AccumuloException(e);
- } catch (AccumuloException e) {
- throw e;
- } catch (AccumuloSecurityException e) {
- throw e;
- }
-
- if (bw != null)
- bws.put(tableName, bw);
- }
-
- private int printMutation(Text table, Mutation m) {
- if (log.isTraceEnabled()) {
- log.trace(String.format("Table %s row key: %s", table, hexDump(m.getRow())));
- for (ColumnUpdate cu : m.getUpdates()) {
- log.trace(String.format("Table %s column: %s:%s", table, hexDump(cu.getColumnFamily()), hexDump(cu.getColumnQualifier())));
- log.trace(String.format("Table %s security: %s", table, new ColumnVisibility(cu.getColumnVisibility()).toString()));
- log.trace(String.format("Table %s value: %s", table, hexDump(cu.getValue())));
- }
- }
- return m.getUpdates().size();
- }
-
- private String hexDump(byte[] ba) {
- StringBuilder sb = new StringBuilder();
- for (byte b : ba) {
- if ((b > 0x20) && (b < 0x7e))
- sb.append((char) b);
- else
- sb.append(String.format("x%02x", b));
- }
- return sb.toString();
- }
-
- @Override
- public void close(Reporter reporter) throws IOException {
- log.debug("mutations written: " + mutCount + ", values written: " + valCount);
- if (simulate)
- return;
-
- try {
- mtbw.close();
- } catch (MutationsRejectedException e) {
- if (e.getAuthorizationFailuresMap().size() >= 0) {
- HashMap<String,Set<SecurityErrorCode>> tables = new HashMap<String,Set<SecurityErrorCode>>();
- for (Entry<KeyExtent,Set<SecurityErrorCode>> ke : e.getAuthorizationFailuresMap().entrySet()) {
- Set<SecurityErrorCode> secCodes = tables.get(ke.getKey().getTableId().toString());
- if (secCodes == null) {
- secCodes = new HashSet<SecurityErrorCode>();
- tables.put(ke.getKey().getTableId().toString(), secCodes);
- }
- secCodes.addAll(ke.getValue());
- }
-
- log.error("Not authorized to write to tables : " + tables);
- }
-
- if (e.getConstraintViolationSummaries().size() > 0) {
- log.error("Constraint violations : " + e.getConstraintViolationSummaries().size());
- }
- }
- }
- }
-
- @Override
- public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException {
- if (!isConnectorInfoSet(job))
- throw new IOException("Connector info has not been set.");
- try {
- // if the instance isn't configured, it will complain here
- String principal = getPrincipal(job);
- AuthenticationToken token = getAuthenticationToken(job);
- Connector c = getInstance(job).getConnector(principal, token);
- if (!c.securityOperations().authenticateUser(principal, token))
- throw new IOException("Unable to authenticate user");
- } catch (AccumuloException e) {
- throw new IOException(e);
- } catch (AccumuloSecurityException e) {
- throw new IOException(e);
- }
- }
-
- @Override
- public RecordWriter<Text,Mutation> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
- try {
- return new AccumuloRecordWriter(job);
- } catch (Exception e) {
- throw new IOException(e);
- }
- }
-
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloRowInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloRowInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloRowInputFormat.java
deleted file mode 100644
index 673c5b8..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloRowInputFormat.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapred;
-
-import java.io.IOException;
-import java.util.Map.Entry;
-
-import org.apache.accumulo.core.client.ClientConfiguration;
-import org.apache.accumulo.core.client.RowIterator;
-import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.core.util.PeekingIterator;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hadoop.mapred.Reporter;
-
-/**
- * This class allows MapReduce jobs to use Accumulo as the source of data. This {@link InputFormat} provides row names as {@link Text} as keys, and a
- * corresponding {@link PeekingIterator} as a value, which in turn makes the {@link Key}/{@link Value} pairs for that row available to the Map function.
- *
- * The user must specify the following via static configurator methods:
- *
- * <ul>
- * <li>{@link AccumuloRowInputFormat#setConnectorInfo(JobConf, String, AuthenticationToken)}
- * <li>{@link AccumuloRowInputFormat#setInputTableName(JobConf, String)}
- * <li>{@link AccumuloRowInputFormat#setScanAuthorizations(JobConf, Authorizations)}
- * <li>{@link AccumuloRowInputFormat#setZooKeeperInstance(JobConf, ClientConfiguration)} OR {@link AccumuloRowInputFormat#setMockInstance(JobConf, String)}
- * </ul>
- *
- * Other static methods are optional.
- */
-public class AccumuloRowInputFormat extends InputFormatBase<Text,PeekingIterator<Entry<Key,Value>>> {
- @Override
- public RecordReader<Text,PeekingIterator<Entry<Key,Value>>> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
- log.setLevel(getLogLevel(job));
- RecordReaderBase<Text,PeekingIterator<Entry<Key,Value>>> recordReader = new RecordReaderBase<Text,PeekingIterator<Entry<Key,Value>>>() {
- RowIterator rowIterator;
-
- @Override
- public void initialize(InputSplit inSplit, JobConf job) throws IOException {
- super.initialize(inSplit, job);
- rowIterator = new RowIterator(scannerIterator);
- }
-
- @Override
- public boolean next(Text key, PeekingIterator<Entry<Key,Value>> value) throws IOException {
- if (!rowIterator.hasNext())
- return false;
- value.initialize(rowIterator.next());
- numKeysRead = rowIterator.getKVCount();
- key.set((currentKey = value.peek().getKey()).getRow());
- return true;
- }
-
- @Override
- public Text createKey() {
- return new Text();
- }
-
- @Override
- public PeekingIterator<Entry<Key,Value>> createValue() {
- return new PeekingIterator<Entry<Key,Value>>();
- }
- };
- recordReader.initialize(split, job);
- return recordReader;
- }
-}
[11/12] ACCUMULO-1880 create mapreduce module
Posted by md...@apache.org.
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
deleted file mode 100644
index 0cee355..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapred;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.accumulo.core.client.ClientSideIteratorScanner;
-import org.apache.accumulo.core.client.IsolatedScanner;
-import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.client.Scanner;
-import org.apache.accumulo.core.client.TableNotFoundException;
-import org.apache.accumulo.core.client.impl.TabletLocator;
-import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.util.Pair;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hadoop.mapred.Reporter;
-
-/**
- * This abstract {@link InputFormat} class allows MapReduce jobs to use Accumulo as the source of K,V pairs.
- * <p>
- * Subclasses must implement a {@link #getRecordReader(InputSplit, JobConf, Reporter)} to provide a {@link RecordReader} for K,V.
- * <p>
- * A static base class, RecordReaderBase, is provided to retrieve Accumulo {@link Key}/{@link Value} pairs, but one must implement its
- * {@link RecordReaderBase#next(Object, Object)} to transform them to the desired generic types K,V.
- * <p>
- * See {@link AccumuloInputFormat} for an example implementation.
- */
-public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
-
- /**
- * Sets the name of the input table, over which this job will scan.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param tableName
- * the table to use when the tablename is null in the write call
- * @since 1.5.0
- */
- public static void setInputTableName(JobConf job, String tableName) {
- InputConfigurator.setInputTableName(CLASS, job, tableName);
- }
-
- /**
- * Gets the table name from the configuration.
- *
- * @param job
- * the Hadoop context for the configured job
- * @return the table name
- * @since 1.5.0
- * @see #setInputTableName(JobConf, String)
- */
- protected static String getInputTableName(JobConf job) {
- return InputConfigurator.getInputTableName(CLASS, job);
- }
-
- /**
- * Sets the input ranges to scan for this job. If not set, the entire table will be scanned.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param ranges
- * the ranges that will be mapped over
- * @since 1.5.0
- */
- public static void setRanges(JobConf job, Collection<Range> ranges) {
- InputConfigurator.setRanges(CLASS, job, ranges);
- }
-
- /**
- * Gets the ranges to scan over from a job.
- *
- * @param job
- * the Hadoop context for the configured job
- * @return the ranges
- * @throws IOException
- * if the ranges have been encoded improperly
- * @since 1.5.0
- * @see #setRanges(JobConf, Collection)
- */
- protected static List<Range> getRanges(JobConf job) throws IOException {
- return InputConfigurator.getRanges(CLASS, job);
- }
-
- /**
- * Restricts the columns that will be mapped over for this job.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param columnFamilyColumnQualifierPairs
- * a pair of {@link Text} objects corresponding to column family and column qualifier. If the column qualifier is null, the entire column family is
- * selected. An empty set is the default and is equivalent to scanning the all columns.
- * @since 1.5.0
- */
- public static void fetchColumns(JobConf job, Collection<Pair<Text,Text>> columnFamilyColumnQualifierPairs) {
- InputConfigurator.fetchColumns(CLASS, job, columnFamilyColumnQualifierPairs);
- }
-
- /**
- * Gets the columns to be mapped over from this job.
- *
- * @param job
- * the Hadoop context for the configured job
- * @return a set of columns
- * @since 1.5.0
- * @see #fetchColumns(JobConf, Collection)
- */
- protected static Set<Pair<Text,Text>> getFetchedColumns(JobConf job) {
- return InputConfigurator.getFetchedColumns(CLASS, job);
- }
-
- /**
- * Encode an iterator on the input for this job.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param cfg
- * the configuration of the iterator
- * @since 1.5.0
- */
- public static void addIterator(JobConf job, IteratorSetting cfg) {
- InputConfigurator.addIterator(CLASS, job, cfg);
- }
-
- /**
- * Gets a list of the iterator settings (for iterators to apply to a scanner) from this configuration.
- *
- * @param job
- * the Hadoop context for the configured job
- * @return a list of iterators
- * @since 1.5.0
- * @see #addIterator(JobConf, IteratorSetting)
- */
- protected static List<IteratorSetting> getIterators(JobConf job) {
- return InputConfigurator.getIterators(CLASS, job);
- }
-
- /**
- * Controls the automatic adjustment of ranges for this job. This feature merges overlapping ranges, then splits them to align with tablet boundaries.
- * Disabling this feature will cause exactly one Map task to be created for each specified range. The default setting is enabled. *
- *
- * <p>
- * By default, this feature is <b>enabled</b>.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param enableFeature
- * the feature is enabled if true, disabled otherwise
- * @see #setRanges(JobConf, Collection)
- * @since 1.5.0
- */
- public static void setAutoAdjustRanges(JobConf job, boolean enableFeature) {
- InputConfigurator.setAutoAdjustRanges(CLASS, job, enableFeature);
- }
-
- /**
- * Determines whether a configuration has auto-adjust ranges enabled.
- *
- * @param job
- * the Hadoop context for the configured job
- * @return false if the feature is disabled, true otherwise
- * @since 1.5.0
- * @see #setAutoAdjustRanges(JobConf, boolean)
- */
- protected static boolean getAutoAdjustRanges(JobConf job) {
- return InputConfigurator.getAutoAdjustRanges(CLASS, job);
- }
-
- /**
- * Controls the use of the {@link IsolatedScanner} in this job.
- *
- * <p>
- * By default, this feature is <b>disabled</b>.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param enableFeature
- * the feature is enabled if true, disabled otherwise
- * @since 1.5.0
- */
- public static void setScanIsolation(JobConf job, boolean enableFeature) {
- InputConfigurator.setScanIsolation(CLASS, job, enableFeature);
- }
-
- /**
- * Determines whether a configuration has isolation enabled.
- *
- * @param job
- * the Hadoop context for the configured job
- * @return true if the feature is enabled, false otherwise
- * @since 1.5.0
- * @see #setScanIsolation(JobConf, boolean)
- */
- protected static boolean isIsolated(JobConf job) {
- return InputConfigurator.isIsolated(CLASS, job);
- }
-
- /**
- * Controls the use of the {@link ClientSideIteratorScanner} in this job. Enabling this feature will cause the iterator stack to be constructed within the Map
- * task, rather than within the Accumulo TServer. To use this feature, all classes needed for those iterators must be available on the classpath for the task.
- *
- * <p>
- * By default, this feature is <b>disabled</b>.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param enableFeature
- * the feature is enabled if true, disabled otherwise
- * @since 1.5.0
- */
- public static void setLocalIterators(JobConf job, boolean enableFeature) {
- InputConfigurator.setLocalIterators(CLASS, job, enableFeature);
- }
-
- /**
- * Determines whether a configuration uses local iterators.
- *
- * @param job
- * the Hadoop context for the configured job
- * @return true if the feature is enabled, false otherwise
- * @since 1.5.0
- * @see #setLocalIterators(JobConf, boolean)
- */
- protected static boolean usesLocalIterators(JobConf job) {
- return InputConfigurator.usesLocalIterators(CLASS, job);
- }
-
- /**
- * <p>
- * Enable reading offline tables. By default, this feature is disabled and only online tables are scanned. This will make the map reduce job directly read the
- * table's files. If the table is not offline, then the job will fail. If the table comes online during the map reduce job, it is likely that the job will
- * fail.
- *
- * <p>
- * To use this option, the map reduce user will need access to read the Accumulo directory in HDFS.
- *
- * <p>
- * Reading the offline table will create the scan time iterator stack in the map process. So any iterators that are configured for the table will need to be
- * on the mapper's classpath.
- *
- * <p>
- * One way to use this feature is to clone a table, take the clone offline, and use the clone as the input table for a map reduce job. If you plan to map
- * reduce over the data many times, it may be better to the compact the table, clone it, take it offline, and use the clone for all map reduce jobs. The
- * reason to do this is that compaction will reduce each tablet in the table to one file, and it is faster to read from one file.
- *
- * <p>
- * There are two possible advantages to reading a tables file directly out of HDFS. First, you may see better read performance. Second, it will support
- * speculative execution better. When reading an online table speculative execution can put more load on an already slow tablet server.
- *
- * <p>
- * By default, this feature is <b>disabled</b>.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param enableFeature
- * the feature is enabled if true, disabled otherwise
- * @since 1.5.0
- */
- public static void setOfflineTableScan(JobConf job, boolean enableFeature) {
- InputConfigurator.setOfflineTableScan(CLASS, job, enableFeature);
- }
-
- /**
- * Determines whether a configuration has the offline table scan feature enabled.
- *
- * @param job
- * the Hadoop context for the configured job
- * @return true if the feature is enabled, false otherwise
- * @since 1.5.0
- * @see #setOfflineTableScan(JobConf, boolean)
- */
- protected static boolean isOfflineScan(JobConf job) {
- return InputConfigurator.isOfflineScan(CLASS, job);
- }
-
- /**
- * Initializes an Accumulo {@link org.apache.accumulo.core.client.impl.TabletLocator} based on the configuration.
- *
- * @param job
- * the Hadoop job for the configured job
- * @return an Accumulo tablet locator
- * @throws org.apache.accumulo.core.client.TableNotFoundException
- * if the table name set on the job doesn't exist
- * @since 1.5.0
- * @deprecated since 1.6.0
- */
- @Deprecated
- protected static TabletLocator getTabletLocator(JobConf job) throws TableNotFoundException {
- return InputConfigurator.getTabletLocator(CLASS, job, InputConfigurator.getInputTableName(CLASS, job));
- }
-
- protected abstract static class RecordReaderBase<K,V> extends AbstractRecordReader<K,V> {
-
- @Override
- protected void setupIterators(JobConf job, Scanner scanner, String tableName, org.apache.accumulo.core.client.mapred.RangeInputSplit split) {
- List<IteratorSetting> iterators = null;
-
- if (null == split) {
- iterators = getIterators(job);
- } else {
- iterators = split.getIterators();
- if (null == iterators) {
- iterators = getIterators(job);
- }
- }
-
- setupIterators(iterators, scanner);
- }
-
- /**
- * Apply the configured iterators to the scanner.
- *
- * @param iterators
- * the iterators to set
- * @param scanner
- * the scanner to configure
- */
- protected void setupIterators(List<IteratorSetting> iterators, Scanner scanner) {
- for (IteratorSetting iterator : iterators) {
- scanner.addScanIterator(iterator);
- }
- }
-
- /**
- * Apply the configured iterators from the configuration to the scanner.
- *
- * @param job
- * the job configuration
- * @param scanner
- * the scanner to configure
- */
- @Deprecated
- protected void setupIterators(JobConf job, Scanner scanner) {
- setupIterators(getIterators(job), scanner);
- }
- }
-
- /**
- * @deprecated since 1.5.2; Use {@link org.apache.accumulo.core.client.mapred.RangeInputSplit} instead.
- * @see org.apache.accumulo.core.client.mapred.RangeInputSplit
- */
- @Deprecated
- public static class RangeInputSplit extends org.apache.accumulo.core.client.mapred.RangeInputSplit {
- public RangeInputSplit() {
- super();
- }
-
- public RangeInputSplit(RangeInputSplit other) throws IOException {
- super(other);
- }
-
- public RangeInputSplit(String table, String tableId, Range range, String[] locations) {
- super(table, tableId, range, locations);
- }
-
- protected RangeInputSplit(String table, Range range, String[] locations) {
- super(table, "", range, locations);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapred/RangeInputSplit.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/RangeInputSplit.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/RangeInputSplit.java
deleted file mode 100644
index 3fd2ab0..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/RangeInputSplit.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapred;
-
-import java.io.IOException;
-
-import org.apache.accumulo.core.data.Range;
-import org.apache.hadoop.mapred.InputSplit;
-
-/**
- * The Class RangeInputSplit. Encapsulates an Accumulo range for use in Map Reduce jobs.
- */
-public class RangeInputSplit extends org.apache.accumulo.core.client.mapreduce.RangeInputSplit implements InputSplit {
-
- public RangeInputSplit() {
- super();
- }
-
- public RangeInputSplit(RangeInputSplit split) throws IOException {
- super(split);
- }
-
- protected RangeInputSplit(String table, String tableId, Range range, String[] locations) {
- super(table, tableId, range, locations);
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
deleted file mode 100644
index 836cff9..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
+++ /dev/null
@@ -1,672 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce;
-
-import java.io.IOException;
-import java.lang.reflect.Method;
-import java.net.InetAddress;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-
-import org.apache.accumulo.core.client.AccumuloException;
-import org.apache.accumulo.core.client.AccumuloSecurityException;
-import org.apache.accumulo.core.client.ClientConfiguration;
-import org.apache.accumulo.core.client.ClientSideIteratorScanner;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.Instance;
-import org.apache.accumulo.core.client.IsolatedScanner;
-import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.client.Scanner;
-import org.apache.accumulo.core.client.TableDeletedException;
-import org.apache.accumulo.core.client.TableNotFoundException;
-import org.apache.accumulo.core.client.TableOfflineException;
-import org.apache.accumulo.core.client.impl.OfflineScanner;
-import org.apache.accumulo.core.client.impl.ScannerImpl;
-import org.apache.accumulo.core.client.impl.Tables;
-import org.apache.accumulo.core.client.impl.TabletLocator;
-import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.KeyExtent;
-import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.master.state.tables.TableState;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.core.security.Credentials;
-import org.apache.accumulo.core.util.Pair;
-import org.apache.accumulo.core.util.UtilWaitThread;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.log4j.Level;
-import org.apache.log4j.Logger;
-
-/**
- * An abstract input format to provide shared methods common to all other input format classes. At the very least, any classes inheriting from this class will
- * need to define their own {@link RecordReader}.
- */
-public abstract class AbstractInputFormat<K,V> extends InputFormat<K,V> {
-
- protected static final Class<?> CLASS = AccumuloInputFormat.class;
- protected static final Logger log = Logger.getLogger(CLASS);
-
- /**
- * Sets the connector information needed to communicate with Accumulo in this job.
- *
- * <p>
- * <b>WARNING:</b> The serialized token is stored in the configuration and shared with all MapReduce tasks. It is BASE64 encoded to provide a charset safe
- * conversion to a string, and is not intended to be secure.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param principal
- * a valid Accumulo user name (user must have Table.CREATE permission)
- * @param token
- * the user's password
- * @since 1.5.0
- */
- public static void setConnectorInfo(Job job, String principal, AuthenticationToken token) throws AccumuloSecurityException {
- InputConfigurator.setConnectorInfo(CLASS, job.getConfiguration(), principal, token);
- }
-
- /**
- * Sets the connector information needed to communicate with Accumulo in this job.
- *
- * <p>
- * Stores the password in a file in HDFS and pulls that into the Distributed Cache in an attempt to be more secure than storing it in the Configuration.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param principal
- * a valid Accumulo user name (user must have Table.CREATE permission)
- * @param tokenFile
- * the path to the token file
- * @since 1.6.0
- */
- public static void setConnectorInfo(Job job, String principal, String tokenFile) throws AccumuloSecurityException {
- InputConfigurator.setConnectorInfo(CLASS, job.getConfiguration(), principal, tokenFile);
- }
-
- /**
- * Determines if the connector has been configured.
- *
- * @param context
- * the Hadoop context for the configured job
- * @return true if the connector has been configured, false otherwise
- * @since 1.5.0
- * @see #setConnectorInfo(Job, String, AuthenticationToken)
- */
- protected static Boolean isConnectorInfoSet(JobContext context) {
- return InputConfigurator.isConnectorInfoSet(CLASS, getConfiguration(context));
- }
-
- /**
- * Gets the user name from the configuration.
- *
- * @param context
- * the Hadoop context for the configured job
- * @return the user name
- * @since 1.5.0
- * @see #setConnectorInfo(Job, String, AuthenticationToken)
- */
- protected static String getPrincipal(JobContext context) {
- return InputConfigurator.getPrincipal(CLASS, getConfiguration(context));
- }
-
- /**
- * Gets the serialized token class from either the configuration or the token file.
- *
- * @since 1.5.0
- * @deprecated since 1.6.0; Use {@link #getAuthenticationToken(JobContext)} instead.
- */
- @Deprecated
- protected static String getTokenClass(JobContext context) {
- return getAuthenticationToken(context).getClass().getName();
- }
-
- /**
- * Gets the serialized token from either the configuration or the token file.
- *
- * @since 1.5.0
- * @deprecated since 1.6.0; Use {@link #getAuthenticationToken(JobContext)} instead.
- */
- @Deprecated
- protected static byte[] getToken(JobContext context) {
- return AuthenticationToken.AuthenticationTokenSerializer.serialize(getAuthenticationToken(context));
- }
-
- /**
- * Gets the authenticated token from either the specified token file or directly from the configuration, whichever was used when the job was configured.
- *
- * @param context
- * the Hadoop context for the configured job
- * @return the principal's authentication token
- * @since 1.6.0
- * @see #setConnectorInfo(Job, String, AuthenticationToken)
- * @see #setConnectorInfo(Job, String, String)
- */
- protected static AuthenticationToken getAuthenticationToken(JobContext context) {
- return InputConfigurator.getAuthenticationToken(CLASS, getConfiguration(context));
- }
-
- /**
- * Configures a {@link org.apache.accumulo.core.client.ZooKeeperInstance} for this job.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param clientConfig
- * client configuration containing connection options
- * @since 1.6.0
- */
- public static void setZooKeeperInstance(Job job, ClientConfiguration clientConfig) {
- InputConfigurator.setZooKeeperInstance(CLASS, job.getConfiguration(), clientConfig);
- }
-
- /**
- * Configures a {@link org.apache.accumulo.core.client.mock.MockInstance} for this job.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param instanceName
- * the Accumulo instance name
- * @since 1.5.0
- */
- public static void setMockInstance(Job job, String instanceName) {
- InputConfigurator.setMockInstance(CLASS, job.getConfiguration(), instanceName);
- }
-
- /**
- * Initializes an Accumulo {@link org.apache.accumulo.core.client.Instance} based on the configuration.
- *
- * @param context
- * the Hadoop context for the configured job
- * @return an Accumulo instance
- * @since 1.5.0
- * @see #setZooKeeperInstance(Job, ClientConfiguration)
- * @see #setMockInstance(Job, String)
- */
- protected static Instance getInstance(JobContext context) {
- return InputConfigurator.getInstance(CLASS, getConfiguration(context));
- }
-
- /**
- * Sets the log level for this job.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param level
- * the logging level
- * @since 1.5.0
- */
- public static void setLogLevel(Job job, Level level) {
- InputConfigurator.setLogLevel(CLASS, job.getConfiguration(), level);
- }
-
- /**
- * Gets the log level from this configuration.
- *
- * @param context
- * the Hadoop context for the configured job
- * @return the log level
- * @since 1.5.0
- * @see #setLogLevel(Job, Level)
- */
- protected static Level getLogLevel(JobContext context) {
- return InputConfigurator.getLogLevel(CLASS, getConfiguration(context));
- }
-
- /**
- * Sets the {@link org.apache.accumulo.core.security.Authorizations} used to scan. Must be a subset of the user's authorization. Defaults to the empty set.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param auths
- * the user's authorizations
- */
- public static void setScanAuthorizations(Job job, Authorizations auths) {
- InputConfigurator.setScanAuthorizations(CLASS, job.getConfiguration(), auths);
- }
-
- /**
- * Gets the authorizations to set for the scans from the configuration.
- *
- * @param context
- * the Hadoop context for the configured job
- * @return the Accumulo scan authorizations
- * @since 1.5.0
- * @see #setScanAuthorizations(Job, Authorizations)
- */
- protected static Authorizations getScanAuthorizations(JobContext context) {
- return InputConfigurator.getScanAuthorizations(CLASS, getConfiguration(context));
- }
-
- /**
- * Fetches all {@link InputTableConfig}s that have been set on the given job.
- *
- * @param context
- * the Hadoop job instance to be configured
- * @return the {@link InputTableConfig} objects for the job
- * @since 1.6.0
- */
- protected static Map<String,InputTableConfig> getInputTableConfigs(JobContext context) {
- return InputConfigurator.getInputTableConfigs(CLASS, getConfiguration(context));
- }
-
- /**
- * Fetches a {@link InputTableConfig} that has been set on the configuration for a specific table.
- *
- * <p>
- * null is returned in the event that the table doesn't exist.
- *
- * @param context
- * the Hadoop job instance to be configured
- * @param tableName
- * the table name for which to grab the config object
- * @return the {@link InputTableConfig} for the given table
- * @since 1.6.0
- */
- protected static InputTableConfig getInputTableConfig(JobContext context, String tableName) {
- return InputConfigurator.getInputTableConfig(CLASS, getConfiguration(context), tableName);
- }
-
- /**
- * Initializes an Accumulo {@link org.apache.accumulo.core.client.impl.TabletLocator} based on the configuration.
- *
- * @param context
- * the Hadoop context for the configured job
- * @param table
- * the table for which to initialize the locator
- * @return an Accumulo tablet locator
- * @throws org.apache.accumulo.core.client.TableNotFoundException
- * if the table name set on the configuration doesn't exist
- * @since 1.6.0
- */
- protected static TabletLocator getTabletLocator(JobContext context, String table) throws TableNotFoundException {
- return InputConfigurator.getTabletLocator(CLASS, getConfiguration(context), table);
- }
-
- // InputFormat doesn't have the equivalent of OutputFormat's checkOutputSpecs(JobContext job)
- /**
- * Check whether a configuration is fully configured to be used with an Accumulo {@link org.apache.hadoop.mapreduce.InputFormat}.
- *
- * @param context
- * the Hadoop context for the configured job
- * @throws java.io.IOException
- * if the context is improperly configured
- * @since 1.5.0
- */
- protected static void validateOptions(JobContext context) throws IOException {
- InputConfigurator.validateOptions(CLASS, getConfiguration(context));
- }
-
- /**
- * An abstract base class to be used to create {@link org.apache.hadoop.mapreduce.RecordReader} instances that convert from Accumulo
- * {@link org.apache.accumulo.core.data.Key}/{@link org.apache.accumulo.core.data.Value} pairs to the user's K/V types.
- *
- * Subclasses must implement {@link #nextKeyValue()} and use it to update the following variables:
- * <ul>
- * <li>K {@link #currentK}</li>
- * <li>V {@link #currentV}</li>
- * <li>Key {@link #currentKey} (used for progress reporting)</li>
- * <li>int {@link #numKeysRead} (used for progress reporting)</li>
- * </ul>
- */
- protected abstract static class AbstractRecordReader<K,V> extends RecordReader<K,V> {
- protected long numKeysRead;
- protected Iterator<Map.Entry<Key,Value>> scannerIterator;
- protected RangeInputSplit split;
-
- /**
- * Configures the iterators on a scanner for the given table name.
- *
- * @param context
- * the Hadoop context for the configured job
- * @param scanner
- * the scanner for which to configure the iterators
- * @param tableName
- * the table name for which the scanner is configured
- * @since 1.6.0
- */
- protected abstract void setupIterators(TaskAttemptContext context, Scanner scanner, String tableName, RangeInputSplit split);
-
- /**
- * Initialize a scanner over the given input split using this task attempt configuration.
- */
- @Override
- public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IOException {
-
- Scanner scanner;
- split = (RangeInputSplit) inSplit;
- log.debug("Initializing input split: " + split.getRange());
-
- Instance instance = split.getInstance();
- if (null == instance) {
- instance = getInstance(attempt);
- }
-
- String principal = split.getPrincipal();
- if (null == principal) {
- principal = getPrincipal(attempt);
- }
-
- AuthenticationToken token = split.getToken();
- if (null == token) {
- token = getAuthenticationToken(attempt);
- }
-
- Authorizations authorizations = split.getAuths();
- if (null == authorizations) {
- authorizations = getScanAuthorizations(attempt);
- }
-
- String table = split.getTableName();
-
- // in case the table name changed, we can still use the previous name for terms of configuration,
- // but the scanner will use the table id resolved at job setup time
- InputTableConfig tableConfig = getInputTableConfig(attempt, split.getTableName());
-
- Boolean isOffline = split.isOffline();
- if (null == isOffline) {
- isOffline = tableConfig.isOfflineScan();
- }
-
- Boolean isIsolated = split.isIsolatedScan();
- if (null == isIsolated) {
- isIsolated = tableConfig.shouldUseIsolatedScanners();
- }
-
- Boolean usesLocalIterators = split.usesLocalIterators();
- if (null == usesLocalIterators) {
- usesLocalIterators = tableConfig.shouldUseLocalIterators();
- }
-
- List<IteratorSetting> iterators = split.getIterators();
- if (null == iterators) {
- iterators = tableConfig.getIterators();
- }
-
- Collection<Pair<Text,Text>> columns = split.getFetchedColumns();
- if (null == columns) {
- columns = tableConfig.getFetchedColumns();
- }
-
- try {
- log.debug("Creating connector with user: " + principal);
- log.debug("Creating scanner for table: " + table);
- log.debug("Authorizations are: " + authorizations);
- if (isOffline) {
- scanner = new OfflineScanner(instance, new Credentials(principal, token), split.getTableId(), authorizations);
- } else if (instance instanceof MockInstance) {
- scanner = instance.getConnector(principal, token).createScanner(split.getTableName(), authorizations);
- } else {
- scanner = new ScannerImpl(instance, new Credentials(principal, token), split.getTableId(), authorizations);
- }
- if (isIsolated) {
- log.info("Creating isolated scanner");
- scanner = new IsolatedScanner(scanner);
- }
- if (usesLocalIterators) {
- log.info("Using local iterators");
- scanner = new ClientSideIteratorScanner(scanner);
- }
-
- setupIterators(attempt, scanner, split.getTableName(), split);
- } catch (Exception e) {
- throw new IOException(e);
- }
-
- // setup a scanner within the bounds of this split
- for (Pair<Text,Text> c : columns) {
- if (c.getSecond() != null) {
- log.debug("Fetching column " + c.getFirst() + ":" + c.getSecond());
- scanner.fetchColumn(c.getFirst(), c.getSecond());
- } else {
- log.debug("Fetching column family " + c.getFirst());
- scanner.fetchColumnFamily(c.getFirst());
- }
- }
-
- scanner.setRange(split.getRange());
- numKeysRead = 0;
-
- // do this last after setting all scanner options
- scannerIterator = scanner.iterator();
- }
-
- @Override
- public void close() {}
-
- @Override
- public float getProgress() throws IOException {
- if (numKeysRead > 0 && currentKey == null)
- return 1.0f;
- return split.getProgress(currentKey);
- }
-
- /**
- * The Key that should be returned to the client
- */
- protected K currentK = null;
-
- /**
- * The Value that should be return to the client
- */
- protected V currentV = null;
-
- /**
- * The Key that is used to determine progress in the current InputSplit. It is not returned to the client and is only used internally
- */
- protected Key currentKey = null;
-
- @Override
- public K getCurrentKey() throws IOException, InterruptedException {
- return currentK;
- }
-
- @Override
- public V getCurrentValue() throws IOException, InterruptedException {
- return currentV;
- }
- }
-
- Map<String,Map<KeyExtent,List<Range>>> binOfflineTable(JobContext context, String tableId, List<Range> ranges) throws TableNotFoundException,
- AccumuloException, AccumuloSecurityException {
-
- Instance instance = getInstance(context);
- Connector conn = instance.getConnector(getPrincipal(context), getAuthenticationToken(context));
-
- return InputConfigurator.binOffline(tableId, ranges, instance, conn);
- }
-
- /**
- * Gets the splits of the tables that have been set on the job.
- *
- * @param context
- * the configuration of the job
- * @return the splits from the tables based on the ranges.
- * @throws java.io.IOException
- * if a table set on the job doesn't exist or an error occurs initializing the tablet locator
- */
- @Override
- public List<InputSplit> getSplits(JobContext context) throws IOException {
- Level logLevel = getLogLevel(context);
- log.setLevel(logLevel);
- validateOptions(context);
- Random random = new Random();
- LinkedList<InputSplit> splits = new LinkedList<InputSplit>();
- Map<String,InputTableConfig> tableConfigs = getInputTableConfigs(context);
- for (Map.Entry<String,InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) {
-
- String tableName = tableConfigEntry.getKey();
- InputTableConfig tableConfig = tableConfigEntry.getValue();
-
- Instance instance = getInstance(context);
- boolean mockInstance;
- String tableId;
- // resolve table name to id once, and use id from this point forward
- if (instance instanceof MockInstance) {
- tableId = "";
- mockInstance = true;
- } else {
- try {
- tableId = Tables.getTableId(instance, tableName);
- } catch (TableNotFoundException e) {
- throw new IOException(e);
- }
- mockInstance = false;
- }
-
- Authorizations auths = getScanAuthorizations(context);
- String principal = getPrincipal(context);
- AuthenticationToken token = getAuthenticationToken(context);
-
- boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
- List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges();
- if (ranges.isEmpty()) {
- ranges = new ArrayList<Range>(1);
- ranges.add(new Range());
- }
-
- // get the metadata information for these ranges
- Map<String,Map<KeyExtent,List<Range>>> binnedRanges = new HashMap<String,Map<KeyExtent,List<Range>>>();
- TabletLocator tl;
- try {
- if (tableConfig.isOfflineScan()) {
- binnedRanges = binOfflineTable(context, tableId, ranges);
- while (binnedRanges == null) {
- // Some tablets were still online, try again
- UtilWaitThread.sleep(100 + random.nextInt(100)); // sleep randomly between 100 and 200 ms
- binnedRanges = binOfflineTable(context, tableId, ranges);
-
- }
- } else {
- tl = getTabletLocator(context, tableId);
- // its possible that the cache could contain complete, but old information about a tables tablets... so clear it
- tl.invalidateCache();
- Credentials creds = new Credentials(getPrincipal(context), getAuthenticationToken(context));
-
- while (!tl.binRanges(creds, ranges, binnedRanges).isEmpty()) {
- if (!(instance instanceof MockInstance)) {
- if (!Tables.exists(instance, tableId))
- throw new TableDeletedException(tableId);
- if (Tables.getTableState(instance, tableId) == TableState.OFFLINE)
- throw new TableOfflineException(instance, tableId);
- }
- binnedRanges.clear();
- log.warn("Unable to locate bins for specified ranges. Retrying.");
- UtilWaitThread.sleep(100 + random.nextInt(100)); // sleep randomly between 100 and 200 ms
- tl.invalidateCache();
- }
- }
- } catch (Exception e) {
- throw new IOException(e);
- }
-
- HashMap<Range,ArrayList<String>> splitsToAdd = null;
-
- if (!autoAdjust)
- splitsToAdd = new HashMap<Range,ArrayList<String>>();
-
- HashMap<String,String> hostNameCache = new HashMap<String,String>();
- for (Map.Entry<String,Map<KeyExtent,List<Range>>> tserverBin : binnedRanges.entrySet()) {
- String ip = tserverBin.getKey().split(":", 2)[0];
- String location = hostNameCache.get(ip);
- if (location == null) {
- InetAddress inetAddress = InetAddress.getByName(ip);
- location = inetAddress.getCanonicalHostName();
- hostNameCache.put(ip, location);
- }
- for (Map.Entry<KeyExtent,List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
- Range ke = extentRanges.getKey().toDataRange();
- for (Range r : extentRanges.getValue()) {
- if (autoAdjust) {
- // divide ranges into smaller ranges, based on the tablets
- RangeInputSplit split = new RangeInputSplit(tableName, tableId, ke.clip(r), new String[] {location});
-
- split.setOffline(tableConfig.isOfflineScan());
- split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
- split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
- split.setMockInstance(mockInstance);
- split.setFetchedColumns(tableConfig.getFetchedColumns());
- split.setPrincipal(principal);
- split.setToken(token);
- split.setInstanceName(instance.getInstanceName());
- split.setZooKeepers(instance.getZooKeepers());
- split.setAuths(auths);
- split.setIterators(tableConfig.getIterators());
- split.setLogLevel(logLevel);
-
- splits.add(split);
- } else {
- // don't divide ranges
- ArrayList<String> locations = splitsToAdd.get(r);
- if (locations == null)
- locations = new ArrayList<String>(1);
- locations.add(location);
- splitsToAdd.put(r, locations);
- }
- }
- }
- }
-
- if (!autoAdjust)
- for (Map.Entry<Range,ArrayList<String>> entry : splitsToAdd.entrySet()) {
- RangeInputSplit split = new RangeInputSplit(tableName, tableId, entry.getKey(), entry.getValue().toArray(new String[0]));
-
- split.setOffline(tableConfig.isOfflineScan());
- split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
- split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
- split.setMockInstance(mockInstance);
- split.setFetchedColumns(tableConfig.getFetchedColumns());
- split.setPrincipal(principal);
- split.setToken(token);
- split.setInstanceName(instance.getInstanceName());
- split.setZooKeepers(instance.getZooKeepers());
- split.setAuths(auths);
- split.setIterators(tableConfig.getIterators());
- split.setLogLevel(logLevel);
-
- splits.add(split);
- }
- }
- return splits;
- }
-
- // use reflection to pull the Configuration out of the JobContext for Hadoop 1 and Hadoop 2 compatibility
- static Configuration getConfiguration(JobContext context) {
- try {
- Class<?> c = AbstractInputFormat.class.getClassLoader().loadClass("org.apache.hadoop.mapreduce.JobContext");
- Method m = c.getMethod("getConfiguration");
- Object o = m.invoke(context, new Object[0]);
- return (Configuration) o;
- } catch (Exception e) {
- throw new RuntimeException(e);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormat.java
deleted file mode 100644
index 196fb04..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormat.java
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce;
-
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.accumulo.core.client.mapreduce.lib.impl.FileOutputConfigurator;
-import org.apache.accumulo.core.conf.AccumuloConfiguration;
-import org.apache.accumulo.core.conf.Property;
-import org.apache.accumulo.core.data.ArrayByteSequence;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.file.FileOperations;
-import org.apache.accumulo.core.file.FileSKVWriter;
-import org.apache.accumulo.core.security.ColumnVisibility;
-import org.apache.commons.collections.map.LRUMap;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.log4j.Logger;
-
-/**
- * This class allows MapReduce jobs to write output in the Accumulo data file format.<br />
- * Care should be taken to write only sorted data (sorted by {@link Key}), as this is an important requirement of Accumulo data files.
- *
- * <p>
- * The output path to be created must be specified via {@link AccumuloFileOutputFormat#setOutputPath(Job, Path)}. This is inherited from
- * {@link FileOutputFormat#setOutputPath(Job, Path)}. Other methods from {@link FileOutputFormat} are not supported and may be ignored or cause failures. Using
- * other Hadoop configuration options that affect the behavior of the underlying files directly in the Job's configuration may work, but are not directly
- * supported at this time.
- */
-public class AccumuloFileOutputFormat extends FileOutputFormat<Key,Value> {
-
- private static final Class<?> CLASS = AccumuloFileOutputFormat.class;
- protected static final Logger log = Logger.getLogger(CLASS);
-
- /**
- * This helper method provides an AccumuloConfiguration object constructed from the Accumulo defaults, and overridden with Accumulo properties that have been
- * stored in the Job's configuration.
- *
- * @param context
- * the Hadoop context for the configured job
- * @since 1.5.0
- */
- protected static AccumuloConfiguration getAccumuloConfiguration(JobContext context) {
- return FileOutputConfigurator.getAccumuloConfiguration(CLASS, InputFormatBase.getConfiguration(context));
- }
-
- /**
- * Sets the compression type to use for data blocks. Specifying a compression may require additional libraries to be available to your Job.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param compressionType
- * one of "none", "gz", "lzo", or "snappy"
- * @since 1.5.0
- */
- public static void setCompressionType(Job job, String compressionType) {
- FileOutputConfigurator.setCompressionType(CLASS, job.getConfiguration(), compressionType);
- }
-
- /**
- * Sets the size for data blocks within each file.<br />
- * Data blocks are a span of key/value pairs stored in the file that are compressed and indexed as a group.
- *
- * <p>
- * Making this value smaller may increase seek performance, but at the cost of increasing the size of the indexes (which can also affect seek performance).
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param dataBlockSize
- * the block size, in bytes
- * @since 1.5.0
- */
- public static void setDataBlockSize(Job job, long dataBlockSize) {
- FileOutputConfigurator.setDataBlockSize(CLASS, job.getConfiguration(), dataBlockSize);
- }
-
- /**
- * Sets the size for file blocks in the file system; file blocks are managed, and replicated, by the underlying file system.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param fileBlockSize
- * the block size, in bytes
- * @since 1.5.0
- */
- public static void setFileBlockSize(Job job, long fileBlockSize) {
- FileOutputConfigurator.setFileBlockSize(CLASS, job.getConfiguration(), fileBlockSize);
- }
-
- /**
- * Sets the size for index blocks within each file; smaller blocks means a deeper index hierarchy within the file, while larger blocks mean a more shallow
- * index hierarchy within the file. This can affect the performance of queries.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param indexBlockSize
- * the block size, in bytes
- * @since 1.5.0
- */
- public static void setIndexBlockSize(Job job, long indexBlockSize) {
- FileOutputConfigurator.setIndexBlockSize(CLASS, job.getConfiguration(), indexBlockSize);
- }
-
- /**
- * Sets the file system replication factor for the resulting file, overriding the file system default.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param replication
- * the number of replicas for produced files
- * @since 1.5.0
- */
- public static void setReplication(Job job, int replication) {
- FileOutputConfigurator.setReplication(CLASS, job.getConfiguration(), replication);
- }
-
- @Override
- public RecordWriter<Key,Value> getRecordWriter(TaskAttemptContext context) throws IOException {
- // get the path of the temporary output file
- final Configuration conf = InputFormatBase.getConfiguration(context);
- final AccumuloConfiguration acuConf = getAccumuloConfiguration(context);
-
- final String extension = acuConf.get(Property.TABLE_FILE_TYPE);
- final Path file = this.getDefaultWorkFile(context, "." + extension);
-
- final LRUMap validVisibilities = new LRUMap(1000);
-
- return new RecordWriter<Key,Value>() {
- FileSKVWriter out = null;
-
- @Override
- public void close(TaskAttemptContext context) throws IOException {
- if (out != null)
- out.close();
- }
-
- @Override
- public void write(Key key, Value value) throws IOException {
-
- Boolean wasChecked = (Boolean) validVisibilities.get(key.getColumnVisibilityData());
- if (wasChecked == null) {
- byte[] cv = key.getColumnVisibilityData().toArray();
- new ColumnVisibility(cv);
- validVisibilities.put(new ArrayByteSequence(Arrays.copyOf(cv, cv.length)), Boolean.TRUE);
- }
-
- if (out == null) {
- out = FileOperations.getInstance().openWriter(file.toString(), file.getFileSystem(conf), conf, acuConf);
- out.startDefaultLocalityGroup();
- }
- out.append(key, value);
- }
- };
- }
-
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java
deleted file mode 100644
index 21a0280..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce;
-
-import java.io.IOException;
-import java.util.Map.Entry;
-
-import org.apache.accumulo.core.client.ClientConfiguration;
-import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.core.util.format.DefaultFormatter;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.log4j.Level;
-
-/**
- * This class allows MapReduce jobs to use Accumulo as the source of data. This {@link InputFormat} provides keys and values of type {@link Key} and
- * {@link Value} to the Map function.
- *
- * The user must specify the following via static configurator methods:
- *
- * <ul>
- * <li>{@link AccumuloInputFormat#setConnectorInfo(Job, String, AuthenticationToken)}
- * <li>{@link AccumuloInputFormat#setScanAuthorizations(Job, Authorizations)}
- * <li>{@link AccumuloInputFormat#setZooKeeperInstance(Job, ClientConfiguration)} OR {@link AccumuloInputFormat#setMockInstance(Job, String)}
- * </ul>
- *
- * Other static methods are optional.
- */
-public class AccumuloInputFormat extends InputFormatBase<Key,Value> {
-
- @Override
- public RecordReader<Key,Value> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
- log.setLevel(getLogLevel(context));
-
- // Override the log level from the configuration as if the RangeInputSplit has one it's the more correct one to use.
- if (split instanceof org.apache.accumulo.core.client.mapreduce.RangeInputSplit) {
- org.apache.accumulo.core.client.mapreduce.RangeInputSplit risplit = (org.apache.accumulo.core.client.mapreduce.RangeInputSplit) split;
- Level level = risplit.getLogLevel();
- if (null != level) {
- log.setLevel(level);
- }
- }
-
- return new RecordReaderBase<Key,Value>() {
-
- @Override
- public boolean nextKeyValue() throws IOException, InterruptedException {
- if (scannerIterator.hasNext()) {
- ++numKeysRead;
- Entry<Key,Value> entry = scannerIterator.next();
- currentK = currentKey = entry.getKey();
- currentV = entry.getValue();
- if (log.isTraceEnabled())
- log.trace("Processing key/value pair: " + DefaultFormatter.formatEntry(entry, true));
- return true;
- }
- return false;
- }
- };
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormat.java
deleted file mode 100644
index af1001f..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormat.java
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.accumulo.core.client.ClientConfiguration;
-import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.client.Scanner;
-import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
-import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.core.util.format.DefaultFormatter;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-/**
- * This class allows MapReduce jobs to use multiple Accumulo tables as the source of data. This {@link org.apache.hadoop.mapreduce.InputFormat} provides keys
- * and values of type {@link Key} and {@link Value} to the Map function.
- *
- * The user must specify the following via static configurator methods:
- *
- * <ul>
- * <li>{@link AccumuloMultiTableInputFormat#setConnectorInfo(Job, String, AuthenticationToken)}
- * <li>{@link AccumuloMultiTableInputFormat#setScanAuthorizations(Job, Authorizations)}
- * <li>{@link AccumuloMultiTableInputFormat#setZooKeeperInstance(Job, ClientConfiguration)} OR {@link AccumuloInputFormat#setMockInstance(Job, String)}
- * <li>{@link AccumuloMultiTableInputFormat#setInputTableConfigs(Job, Map)}
- * </ul>
- *
- * Other static methods are optional.
- */
-public class AccumuloMultiTableInputFormat extends AbstractInputFormat<Key,Value> {
-
- /**
- * Sets the {@link InputTableConfig} objects on the given Hadoop configuration
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param configs
- * the table query configs to be set on the configuration.
- * @since 1.6.0
- */
- public static void setInputTableConfigs(Job job, Map<String,InputTableConfig> configs) {
- checkNotNull(configs);
- InputConfigurator.setInputTableConfigs(CLASS, getConfiguration(job), configs);
- }
-
- @Override
- public RecordReader<Key,Value> createRecordReader(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
- log.setLevel(getLogLevel(context));
- return new AbstractRecordReader<Key,Value>() {
- @Override
- public boolean nextKeyValue() throws IOException, InterruptedException {
- if (scannerIterator.hasNext()) {
- ++numKeysRead;
- Map.Entry<Key,Value> entry = scannerIterator.next();
- currentK = currentKey = entry.getKey();
- currentV = entry.getValue();
- if (log.isTraceEnabled())
- log.trace("Processing key/value pair: " + DefaultFormatter.formatEntry(entry, true));
- return true;
- }
- return false;
- }
-
- @Override
- protected void setupIterators(TaskAttemptContext context, Scanner scanner, String tableName, RangeInputSplit split) {
- List<IteratorSetting> iterators = split.getIterators();
- if (null == iterators) {
- iterators = getInputTableConfig(context, tableName).getIterators();
- }
-
- for (IteratorSetting setting : iterators) {
- scanner.addScanIterator(setting);
- }
- }
- };
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloOutputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloOutputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloOutputFormat.java
deleted file mode 100644
index af9bbae..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloOutputFormat.java
+++ /dev/null
@@ -1,545 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map.Entry;
-import java.util.Set;
-
-import org.apache.accumulo.core.client.AccumuloException;
-import org.apache.accumulo.core.client.AccumuloSecurityException;
-import org.apache.accumulo.core.client.BatchWriter;
-import org.apache.accumulo.core.client.BatchWriterConfig;
-import org.apache.accumulo.core.client.ClientConfiguration;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.Instance;
-import org.apache.accumulo.core.client.MultiTableBatchWriter;
-import org.apache.accumulo.core.client.MutationsRejectedException;
-import org.apache.accumulo.core.client.TableExistsException;
-import org.apache.accumulo.core.client.TableNotFoundException;
-import org.apache.accumulo.core.client.ZooKeeperInstance;
-import org.apache.accumulo.core.client.mapreduce.lib.impl.OutputConfigurator;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.client.security.SecurityErrorCode;
-import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
-import org.apache.accumulo.core.client.security.tokens.AuthenticationToken.AuthenticationTokenSerializer;
-import org.apache.accumulo.core.data.ColumnUpdate;
-import org.apache.accumulo.core.data.KeyExtent;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.security.ColumnVisibility;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.OutputCommitter;
-import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.apache.log4j.Level;
-import org.apache.log4j.Logger;
-
-/**
- * This class allows MapReduce jobs to use Accumulo as the sink for data. This {@link OutputFormat} accepts keys and values of type {@link Text} (for a table
- * name) and {@link Mutation} from the Map and Reduce functions.
- *
- * The user must specify the following via static configurator methods:
- *
- * <ul>
- * <li>{@link AccumuloOutputFormat#setConnectorInfo(Job, String, AuthenticationToken)}
- * <li>{@link AccumuloOutputFormat#setConnectorInfo(Job, String, String)}
- * <li>{@link AccumuloOutputFormat#setZooKeeperInstance(Job, ClientConfiguration)} OR {@link AccumuloOutputFormat#setMockInstance(Job, String)}
- * </ul>
- *
- * Other static methods are optional.
- */
-public class AccumuloOutputFormat extends OutputFormat<Text,Mutation> {
-
- private static final Class<?> CLASS = AccumuloOutputFormat.class;
- protected static final Logger log = Logger.getLogger(CLASS);
-
- /**
- * Sets the connector information needed to communicate with Accumulo in this job.
- *
- * <p>
- * <b>WARNING:</b> The serialized token is stored in the configuration and shared with all MapReduce tasks. It is BASE64 encoded to provide a charset safe
- * conversion to a string, and is not intended to be secure.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param principal
- * a valid Accumulo user name (user must have Table.CREATE permission if {@link #setCreateTables(Job, boolean)} is set to true)
- * @param token
- * the user's password
- * @since 1.5.0
- */
- public static void setConnectorInfo(Job job, String principal, AuthenticationToken token) throws AccumuloSecurityException {
- OutputConfigurator.setConnectorInfo(CLASS, job.getConfiguration(), principal, token);
- }
-
- /**
- * Sets the connector information needed to communicate with Accumulo in this job.
- *
- * <p>
- * Stores the password in a file in HDFS and pulls that into the Distributed Cache in an attempt to be more secure than storing it in the Configuration.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param principal
- * a valid Accumulo user name (user must have Table.CREATE permission if {@link #setCreateTables(Job, boolean)} is set to true)
- * @param tokenFile
- * the path to the token file
- * @since 1.6.0
- */
- public static void setConnectorInfo(Job job, String principal, String tokenFile) throws AccumuloSecurityException {
- OutputConfigurator.setConnectorInfo(CLASS, job.getConfiguration(), principal, tokenFile);
- }
-
- /**
- * Determines if the connector has been configured.
- *
- * @param context
- * the Hadoop context for the configured job
- * @return true if the connector has been configured, false otherwise
- * @since 1.5.0
- * @see #setConnectorInfo(Job, String, AuthenticationToken)
- */
- protected static Boolean isConnectorInfoSet(JobContext context) {
- return OutputConfigurator.isConnectorInfoSet(CLASS, InputFormatBase.getConfiguration(context));
- }
-
- /**
- * Gets the user name from the configuration.
- *
- * @param context
- * the Hadoop context for the configured job
- * @return the user name
- * @since 1.5.0
- * @see #setConnectorInfo(Job, String, AuthenticationToken)
- */
- protected static String getPrincipal(JobContext context) {
- return OutputConfigurator.getPrincipal(CLASS, InputFormatBase.getConfiguration(context));
- }
-
- /**
- * Gets the serialized token class from either the configuration or the token file.
- *
- * @since 1.5.0
- * @deprecated since 1.6.0; Use {@link #getAuthenticationToken(JobContext)} instead.
- */
- @Deprecated
- protected static String getTokenClass(JobContext context) {
- return getAuthenticationToken(context).getClass().getName();
- }
-
- /**
- * Gets the serialized token from either the configuration or the token file.
- *
- * @since 1.5.0
- * @deprecated since 1.6.0; Use {@link #getAuthenticationToken(JobContext)} instead.
- */
- @Deprecated
- protected static byte[] getToken(JobContext context) {
- return AuthenticationTokenSerializer.serialize(getAuthenticationToken(context));
- }
-
- /**
- * Gets the authenticated token from either the specified token file or directly from the configuration, whichever was used when the job was configured.
- *
- * @param context
- * the Hadoop context for the configured job
- * @return the principal's authentication token
- * @since 1.6.0
- * @see #setConnectorInfo(Job, String, AuthenticationToken)
- * @see #setConnectorInfo(Job, String, String)
- */
- protected static AuthenticationToken getAuthenticationToken(JobContext context) {
- return OutputConfigurator.getAuthenticationToken(CLASS, InputFormatBase.getConfiguration(context));
- }
-
- /**
- * Configures a {@link ZooKeeperInstance} for this job.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param clientConfig
- * client configuration for specifying connection timeouts, SSL connection options, etc.
- * @since 1.6.0
- */
- public static void setZooKeeperInstance(Job job, ClientConfiguration clientConfig) {
- OutputConfigurator.setZooKeeperInstance(CLASS, job.getConfiguration(), clientConfig);
- }
-
- /**
- * Configures a {@link MockInstance} for this job.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param instanceName
- * the Accumulo instance name
- * @since 1.5.0
- */
- public static void setMockInstance(Job job, String instanceName) {
- OutputConfigurator.setMockInstance(CLASS, job.getConfiguration(), instanceName);
- }
-
- /**
- * Initializes an Accumulo {@link Instance} based on the configuration.
- *
- * @param context
- * the Hadoop context for the configured job
- * @return an Accumulo instance
- * @since 1.5.0
- * @see #setZooKeeperInstance(Job, ClientConfiguration)
- * @see #setMockInstance(Job, String)
- */
- protected static Instance getInstance(JobContext context) {
- return OutputConfigurator.getInstance(CLASS, InputFormatBase.getConfiguration(context));
- }
-
- /**
- * Sets the log level for this job.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param level
- * the logging level
- * @since 1.5.0
- */
- public static void setLogLevel(Job job, Level level) {
- OutputConfigurator.setLogLevel(CLASS, job.getConfiguration(), level);
- }
-
- /**
- * Gets the log level from this configuration.
- *
- * @param context
- * the Hadoop context for the configured job
- * @return the log level
- * @since 1.5.0
- * @see #setLogLevel(Job, Level)
- */
- protected static Level getLogLevel(JobContext context) {
- return OutputConfigurator.getLogLevel(CLASS, InputFormatBase.getConfiguration(context));
- }
-
- /**
- * Sets the default table name to use if one emits a null in place of a table name for a given mutation. Table names can only be alpha-numeric and
- * underscores.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param tableName
- * the table to use when the tablename is null in the write call
- * @since 1.5.0
- */
- public static void setDefaultTableName(Job job, String tableName) {
- OutputConfigurator.setDefaultTableName(CLASS, job.getConfiguration(), tableName);
- }
-
- /**
- * Gets the default table name from the configuration.
- *
- * @param context
- * the Hadoop context for the configured job
- * @return the default table name
- * @since 1.5.0
- * @see #setDefaultTableName(Job, String)
- */
- protected static String getDefaultTableName(JobContext context) {
- return OutputConfigurator.getDefaultTableName(CLASS, InputFormatBase.getConfiguration(context));
- }
-
- /**
- * Sets the configuration for for the job's {@link BatchWriter} instances. If not set, a new {@link BatchWriterConfig}, with sensible built-in defaults is
- * used. Setting the configuration multiple times overwrites any previous configuration.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param bwConfig
- * the configuration for the {@link BatchWriter}
- * @since 1.5.0
- */
- public static void setBatchWriterOptions(Job job, BatchWriterConfig bwConfig) {
- OutputConfigurator.setBatchWriterOptions(CLASS, job.getConfiguration(), bwConfig);
- }
-
- /**
- * Gets the {@link BatchWriterConfig} settings.
- *
- * @param context
- * the Hadoop context for the configured job
- * @return the configuration object
- * @since 1.5.0
- * @see #setBatchWriterOptions(Job, BatchWriterConfig)
- */
- protected static BatchWriterConfig getBatchWriterOptions(JobContext context) {
- return OutputConfigurator.getBatchWriterOptions(CLASS, InputFormatBase.getConfiguration(context));
- }
-
- /**
- * Sets the directive to create new tables, as necessary. Table names can only be alpha-numeric and underscores.
- *
- * <p>
- * By default, this feature is <b>disabled</b>.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param enableFeature
- * the feature is enabled if true, disabled otherwise
- * @since 1.5.0
- */
- public static void setCreateTables(Job job, boolean enableFeature) {
- OutputConfigurator.setCreateTables(CLASS, job.getConfiguration(), enableFeature);
- }
-
- /**
- * Determines whether tables are permitted to be created as needed.
- *
- * @param context
- * the Hadoop context for the configured job
- * @return true if the feature is disabled, false otherwise
- * @since 1.5.0
- * @see #setCreateTables(Job, boolean)
- */
- protected static Boolean canCreateTables(JobContext context) {
- return OutputConfigurator.canCreateTables(CLASS, InputFormatBase.getConfiguration(context));
- }
-
- /**
- * Sets the directive to use simulation mode for this job. In simulation mode, no output is produced. This is useful for testing.
- *
- * <p>
- * By default, this feature is <b>disabled</b>.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param enableFeature
- * the feature is enabled if true, disabled otherwise
- * @since 1.5.0
- */
- public static void setSimulationMode(Job job, boolean enableFeature) {
- OutputConfigurator.setSimulationMode(CLASS, job.getConfiguration(), enableFeature);
- }
-
- /**
- * Determines whether this feature is enabled.
- *
- * @param context
- * the Hadoop context for the configured job
- * @return true if the feature is enabled, false otherwise
- * @since 1.5.0
- * @see #setSimulationMode(Job, boolean)
- */
- protected static Boolean getSimulationMode(JobContext context) {
- return OutputConfigurator.getSimulationMode(CLASS, InputFormatBase.getConfiguration(context));
- }
-
- /**
- * A base class to be used to create {@link RecordWriter} instances that write to Accumulo.
- */
- protected static class AccumuloRecordWriter extends RecordWriter<Text,Mutation> {
- private MultiTableBatchWriter mtbw = null;
- private HashMap<Text,BatchWriter> bws = null;
- private Text defaultTableName = null;
-
- private boolean simulate = false;
- private boolean createTables = false;
-
- private long mutCount = 0;
- private long valCount = 0;
-
- private Connector conn;
-
- protected AccumuloRecordWriter(TaskAttemptContext context) throws AccumuloException, AccumuloSecurityException, IOException {
- Level l = getLogLevel(context);
- if (l != null)
- log.setLevel(getLogLevel(context));
- this.simulate = getSimulationMode(context);
- this.createTables = canCreateTables(context);
-
- if (simulate)
- log.info("Simulating output only. No writes to tables will occur");
-
- this.bws = new HashMap<Text,BatchWriter>();
-
- String tname = getDefaultTableName(context);
- this.defaultTableName = (tname == null) ? null : new Text(tname);
-
- if (!simulate) {
- this.conn = getInstance(context).getConnector(getPrincipal(context), getAuthenticationToken(context));
- mtbw = conn.createMultiTableBatchWriter(getBatchWriterOptions(context));
- }
- }
-
- /**
- * Push a mutation into a table. If table is null, the defaultTable will be used. If canCreateTable is set, the table will be created if it does not exist.
- * The table name must only contain alphanumerics and underscore.
- */
- @Override
- public void write(Text table, Mutation mutation) throws IOException {
- if (table == null || table.toString().isEmpty())
- table = this.defaultTableName;
-
- if (!simulate && table == null)
- throw new IOException("No table or default table specified. Try simulation mode next time");
-
- ++mutCount;
- valCount += mutation.size();
- printMutation(table, mutation);
-
- if (simulate)
- return;
-
- if (!bws.containsKey(table))
- try {
- addTable(table);
- } catch (Exception e) {
- e.printStackTrace();
- throw new IOException(e);
- }
-
- try {
- bws.get(table).addMutation(mutation);
- } catch (MutationsRejectedException e) {
- throw new IOException(e);
- }
- }
-
- public void addTable(Text tableName) throws AccumuloException, AccumuloSecurityException {
- if (simulate) {
- log.info("Simulating adding table: " + tableName);
- return;
- }
-
- log.debug("Adding table: " + tableName);
- BatchWriter bw = null;
- String table = tableName.toString();
-
- if (createTables && !conn.tableOperations().exists(table)) {
- try {
- conn.tableOperations().create(table);
- } catch (AccumuloSecurityException e) {
- log.error("Accumulo security violation creating " + table, e);
- throw e;
- } catch (TableExistsException e) {
- // Shouldn't happen
- }
- }
-
- try {
- bw = mtbw.getBatchWriter(table);
- } catch (TableNotFoundException e) {
- log.error("Accumulo table " + table + " doesn't exist and cannot be created.", e);
- throw new AccumuloException(e);
- } catch (AccumuloException e) {
- throw e;
- } catch (AccumuloSecurityException e) {
- throw e;
- }
-
- if (bw != null)
- bws.put(tableName, bw);
- }
-
- private int printMutation(Text table, Mutation m) {
- if (log.isTraceEnabled()) {
- log.trace(String.format("Table %s row key: %s", table, hexDump(m.getRow())));
- for (ColumnUpdate cu : m.getUpdates()) {
- log.trace(String.format("Table %s column: %s:%s", table, hexDump(cu.getColumnFamily()), hexDump(cu.getColumnQualifier())));
- log.trace(String.format("Table %s security: %s", table, new ColumnVisibility(cu.getColumnVisibility()).toString()));
- log.trace(String.format("Table %s value: %s", table, hexDump(cu.getValue())));
- }
- }
- return m.getUpdates().size();
- }
-
- private String hexDump(byte[] ba) {
- StringBuilder sb = new StringBuilder();
- for (byte b : ba) {
- if ((b > 0x20) && (b < 0x7e))
- sb.append((char) b);
- else
- sb.append(String.format("x%02x", b));
- }
- return sb.toString();
- }
-
- @Override
- public void close(TaskAttemptContext attempt) throws IOException, InterruptedException {
- log.debug("mutations written: " + mutCount + ", values written: " + valCount);
- if (simulate)
- return;
-
- try {
- mtbw.close();
- } catch (MutationsRejectedException e) {
- if (e.getAuthorizationFailuresMap().size() >= 0) {
- HashMap<String,Set<SecurityErrorCode>> tables = new HashMap<String,Set<SecurityErrorCode>>();
- for (Entry<KeyExtent,Set<SecurityErrorCode>> ke : e.getAuthorizationFailuresMap().entrySet()) {
- Set<SecurityErrorCode> secCodes = tables.get(ke.getKey().getTableId().toString());
- if (secCodes == null) {
- secCodes = new HashSet<SecurityErrorCode>();
- tables.put(ke.getKey().getTableId().toString(), secCodes);
- }
- secCodes.addAll(ke.getValue());
- }
-
- log.error("Not authorized to write to tables : " + tables);
- }
-
- if (e.getConstraintViolationSummaries().size() > 0) {
- log.error("Constraint violations : " + e.getConstraintViolationSummaries().size());
- }
- }
- }
- }
-
- @Override
- public void checkOutputSpecs(JobContext job) throws IOException {
- if (!isConnectorInfoSet(job))
- throw new IOException("Connector info has not been set.");
- try {
- // if the instance isn't configured, it will complain here
- String principal = getPrincipal(job);
- AuthenticationToken token = getAuthenticationToken(job);
- Connector c = getInstance(job).getConnector(principal, token);
- if (!c.securityOperations().authenticateUser(principal, token))
- throw new IOException("Unable to authenticate user");
- } catch (AccumuloException e) {
- throw new IOException(e);
- } catch (AccumuloSecurityException e) {
- throw new IOException(e);
- }
- }
-
- @Override
- public OutputCommitter getOutputCommitter(TaskAttemptContext context) {
- return new NullOutputFormat<Text,Mutation>().getOutputCommitter(context);
- }
-
- @Override
- public RecordWriter<Text,Mutation> getRecordWriter(TaskAttemptContext attempt) throws IOException {
- try {
- return new AccumuloRecordWriter(attempt);
- } catch (Exception e) {
- throw new IOException(e);
- }
- }
-
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloRowInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloRowInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloRowInputFormat.java
deleted file mode 100644
index 37caf15..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloRowInputFormat.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce;
-
-import java.io.IOException;
-import java.util.Map.Entry;
-
-import org.apache.accumulo.core.client.ClientConfiguration;
-import org.apache.accumulo.core.client.RowIterator;
-import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.core.util.PeekingIterator;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-/**
- * This class allows MapReduce jobs to use Accumulo as the source of data. This {@link InputFormat} provides row names as {@link Text} as keys, and a
- * corresponding {@link PeekingIterator} as a value, which in turn makes the {@link Key}/{@link Value} pairs for that row available to the Map function.
- *
- * The user must specify the following via static configurator methods:
- *
- * <ul>
- * <li>{@link AccumuloRowInputFormat#setConnectorInfo(Job, String, AuthenticationToken)}
- * <li>{@link AccumuloRowInputFormat#setInputTableName(Job, String)}
- * <li>{@link AccumuloRowInputFormat#setScanAuthorizations(Job, Authorizations)}
- * <li>{@link AccumuloRowInputFormat#setZooKeeperInstance(Job, ClientConfiguration)} OR {@link AccumuloRowInputFormat#setMockInstance(Job, String)}
- * </ul>
- *
- * Other static methods are optional.
- */
-public class AccumuloRowInputFormat extends InputFormatBase<Text,PeekingIterator<Entry<Key,Value>>> {
- @Override
- public RecordReader<Text,PeekingIterator<Entry<Key,Value>>> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException,
- InterruptedException {
- log.setLevel(getLogLevel(context));
- return new RecordReaderBase<Text,PeekingIterator<Entry<Key,Value>>>() {
- RowIterator rowIterator;
-
- @Override
- public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IOException {
- super.initialize(inSplit, attempt);
- rowIterator = new RowIterator(scannerIterator);
- currentK = new Text();
- currentV = null;
- }
-
- @Override
- public boolean nextKeyValue() throws IOException, InterruptedException {
- if (!rowIterator.hasNext())
- return false;
- currentV = new PeekingIterator<Entry<Key,Value>>(rowIterator.next());
- numKeysRead = rowIterator.getKVCount();
- currentKey = currentV.peek().getKey();
- currentK = new Text(currentKey.getRow());
- return true;
- }
- };
- }
-}
[03/12] ACCUMULO-1880 create mapreduce module
Posted by md...@apache.org.
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormatTest.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormatTest.java b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormatTest.java
new file mode 100644
index 0000000..2864016
--- /dev/null
+++ b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloMultiTableInputFormatTest.java
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapred;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.BatchWriterConfig;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.mapreduce.InputTableConfig;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.PasswordToken;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.accumulo.core.util.Pair;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.lib.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.Test;
+
+public class AccumuloMultiTableInputFormatTest {
+
+ private static final String PREFIX = AccumuloMultiTableInputFormatTest.class.getSimpleName();
+ private static final String INSTANCE_NAME = PREFIX + "_mapred_instance";
+ private static final String TEST_TABLE_1 = PREFIX + "_mapred_table_1";
+ private static final String TEST_TABLE_2 = PREFIX + "_mapred_table_2";
+
+ private static AssertionError e1 = null;
+ private static AssertionError e2 = null;
+
+ private static class MRTester extends Configured implements Tool {
+ private static class TestMapper implements Mapper<Key,Value,Key,Value> {
+ Key key = null;
+ int count = 0;
+
+ @Override
+ public void map(Key k, Value v, OutputCollector<Key,Value> output, Reporter reporter) throws IOException {
+ try {
+ String tableName = ((RangeInputSplit) reporter.getInputSplit()).getTableName();
+ if (key != null)
+ assertEquals(key.getRow().toString(), new String(v.get()));
+ assertEquals(new Text(String.format("%s_%09x", tableName, count + 1)), k.getRow());
+ assertEquals(String.format("%s_%09x", tableName, count), new String(v.get()));
+ } catch (AssertionError e) {
+ e1 = e;
+ }
+ key = new Key(k);
+ count++;
+ }
+
+ @Override
+ public void configure(JobConf job) {}
+
+ @Override
+ public void close() throws IOException {
+ try {
+ assertEquals(100, count);
+ } catch (AssertionError e) {
+ e2 = e;
+ }
+ }
+
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+
+ if (args.length != 4) {
+ throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <user> <pass> <table1> <table2>");
+ }
+
+ String user = args[0];
+ String pass = args[1];
+ String table1 = args[2];
+ String table2 = args[3];
+
+ JobConf job = new JobConf(getConf());
+ job.setJarByClass(this.getClass());
+
+ job.setInputFormat(AccumuloInputFormat.class);
+
+ AccumuloMultiTableInputFormat.setConnectorInfo(job, user, new PasswordToken(pass));
+ AccumuloMultiTableInputFormat.setMockInstance(job, INSTANCE_NAME);
+
+ InputTableConfig tableConfig1 = new InputTableConfig();
+ InputTableConfig tableConfig2 = new InputTableConfig();
+
+ Map<String,InputTableConfig> configMap = new HashMap<String,InputTableConfig>();
+ configMap.put(table1, tableConfig1);
+ configMap.put(table2, tableConfig2);
+
+ AccumuloMultiTableInputFormat.setInputTableConfigs(job, configMap);
+
+ job.setMapperClass(TestMapper.class);
+ job.setMapOutputKeyClass(Key.class);
+ job.setMapOutputValueClass(Value.class);
+ job.setOutputFormat(NullOutputFormat.class);
+
+ job.setNumReduceTasks(0);
+
+ return JobClient.runJob(job).isSuccessful() ? 0 : 1;
+ }
+
+ public static void main(String[] args) throws Exception {
+ assertEquals(0, ToolRunner.run(CachedConfiguration.getInstance(), new MRTester(), args));
+ }
+ }
+
+ @Test
+ public void testMap() throws Exception {
+ MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
+ Connector c = mockInstance.getConnector("root", new PasswordToken(""));
+ c.tableOperations().create(TEST_TABLE_1);
+ c.tableOperations().create(TEST_TABLE_2);
+ BatchWriter bw = c.createBatchWriter(TEST_TABLE_1, new BatchWriterConfig());
+ BatchWriter bw2 = c.createBatchWriter(TEST_TABLE_2, new BatchWriterConfig());
+ for (int i = 0; i < 100; i++) {
+ Mutation t1m = new Mutation(new Text(String.format("%s_%09x", TEST_TABLE_1, i + 1)));
+ t1m.put(new Text(), new Text(), new Value(String.format("%s_%09x", TEST_TABLE_1, i).getBytes()));
+ bw.addMutation(t1m);
+ Mutation t2m = new Mutation(new Text(String.format("%s_%09x", TEST_TABLE_2, i + 1)));
+ t2m.put(new Text(), new Text(), new Value(String.format("%s_%09x", TEST_TABLE_2, i).getBytes()));
+ bw2.addMutation(t2m);
+ }
+ bw.close();
+ bw2.close();
+
+ MRTester.main(new String[] {"root", "", TEST_TABLE_1, TEST_TABLE_2});
+ assertNull(e1);
+ assertNull(e2);
+ }
+
+ /**
+ * Verify {@link org.apache.accumulo.core.client.mapreduce.InputTableConfig} objects get correctly serialized in the JobContext.
+ */
+ @Test
+ public void testTableQueryConfigSerialization() throws IOException {
+
+ JobConf job = new JobConf();
+
+ InputTableConfig table1 = new InputTableConfig().setRanges(Collections.singletonList(new Range("a", "b")))
+ .fetchColumns(Collections.singleton(new Pair<Text,Text>(new Text("CF1"), new Text("CQ1"))))
+ .setIterators(Collections.singletonList(new IteratorSetting(50, "iter1", "iterclass1")));
+
+ InputTableConfig table2 = new InputTableConfig().setRanges(Collections.singletonList(new Range("a", "b")))
+ .fetchColumns(Collections.singleton(new Pair<Text,Text>(new Text("CF1"), new Text("CQ1"))))
+ .setIterators(Collections.singletonList(new IteratorSetting(50, "iter1", "iterclass1")));
+
+ Map<String,InputTableConfig> configMap = new HashMap<String,InputTableConfig>();
+ configMap.put(TEST_TABLE_1, table1);
+ configMap.put(TEST_TABLE_2, table2);
+ AccumuloMultiTableInputFormat.setInputTableConfigs(job, configMap);
+
+ assertEquals(table1, AccumuloMultiTableInputFormat.getInputTableConfig(job, TEST_TABLE_1));
+ assertEquals(table2, AccumuloMultiTableInputFormat.getInputTableConfig(job, TEST_TABLE_2));
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloOutputFormatTest.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloOutputFormatTest.java b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloOutputFormatTest.java
new file mode 100644
index 0000000..36054c8
--- /dev/null
+++ b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloOutputFormatTest.java
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapred;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Map.Entry;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.BatchWriterConfig;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.PasswordToken;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.Test;
+
+/**
+ *
+ */
+public class AccumuloOutputFormatTest {
+ private static AssertionError e1 = null;
+ private static final String PREFIX = AccumuloOutputFormatTest.class.getSimpleName();
+ private static final String INSTANCE_NAME = PREFIX + "_mapred_instance";
+ private static final String TEST_TABLE_1 = PREFIX + "_mapred_table_1";
+ private static final String TEST_TABLE_2 = PREFIX + "_mapred_table_2";
+
+ private static class MRTester extends Configured implements Tool {
+ private static class TestMapper implements Mapper<Key,Value,Text,Mutation> {
+ Key key = null;
+ int count = 0;
+ OutputCollector<Text,Mutation> finalOutput;
+
+ @Override
+ public void map(Key k, Value v, OutputCollector<Text,Mutation> output, Reporter reporter) throws IOException {
+ finalOutput = output;
+ try {
+ if (key != null)
+ assertEquals(key.getRow().toString(), new String(v.get()));
+ assertEquals(k.getRow(), new Text(String.format("%09x", count + 1)));
+ assertEquals(new String(v.get()), String.format("%09x", count));
+ } catch (AssertionError e) {
+ e1 = e;
+ }
+ key = new Key(k);
+ count++;
+ }
+
+ @Override
+ public void configure(JobConf job) {}
+
+ @Override
+ public void close() throws IOException {
+ Mutation m = new Mutation("total");
+ m.put("", "", Integer.toString(count));
+ finalOutput.collect(new Text(), m);
+ }
+
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+
+ if (args.length != 4) {
+ throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <user> <pass> <inputtable> <outputtable>");
+ }
+
+ String user = args[0];
+ String pass = args[1];
+ String table1 = args[2];
+ String table2 = args[3];
+
+ JobConf job = new JobConf(getConf());
+ job.setJarByClass(this.getClass());
+
+ job.setInputFormat(AccumuloInputFormat.class);
+
+ AccumuloInputFormat.setConnectorInfo(job, user, new PasswordToken(pass));
+ AccumuloInputFormat.setInputTableName(job, table1);
+ AccumuloInputFormat.setMockInstance(job, INSTANCE_NAME);
+
+ job.setMapperClass(TestMapper.class);
+ job.setMapOutputKeyClass(Key.class);
+ job.setMapOutputValueClass(Value.class);
+ job.setOutputFormat(AccumuloOutputFormat.class);
+ job.setOutputKeyClass(Text.class);
+ job.setOutputValueClass(Mutation.class);
+
+ AccumuloOutputFormat.setConnectorInfo(job, user, new PasswordToken(pass));
+ AccumuloOutputFormat.setCreateTables(job, false);
+ AccumuloOutputFormat.setDefaultTableName(job, table2);
+ AccumuloOutputFormat.setMockInstance(job, INSTANCE_NAME);
+
+ job.setNumReduceTasks(0);
+
+ return JobClient.runJob(job).isSuccessful() ? 0 : 1;
+ }
+
+ public static void main(String[] args) throws Exception {
+ assertEquals(0, ToolRunner.run(CachedConfiguration.getInstance(), new MRTester(), args));
+ }
+ }
+
+ @Test
+ public void testBWSettings() throws IOException {
+ JobConf job = new JobConf();
+
+ // make sure we aren't testing defaults
+ final BatchWriterConfig bwDefaults = new BatchWriterConfig();
+ assertNotEquals(7654321l, bwDefaults.getMaxLatency(TimeUnit.MILLISECONDS));
+ assertNotEquals(9898989l, bwDefaults.getTimeout(TimeUnit.MILLISECONDS));
+ assertNotEquals(42, bwDefaults.getMaxWriteThreads());
+ assertNotEquals(1123581321l, bwDefaults.getMaxMemory());
+
+ final BatchWriterConfig bwConfig = new BatchWriterConfig();
+ bwConfig.setMaxLatency(7654321l, TimeUnit.MILLISECONDS);
+ bwConfig.setTimeout(9898989l, TimeUnit.MILLISECONDS);
+ bwConfig.setMaxWriteThreads(42);
+ bwConfig.setMaxMemory(1123581321l);
+ AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig);
+
+ AccumuloOutputFormat myAOF = new AccumuloOutputFormat() {
+ @Override
+ public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException {
+ BatchWriterConfig bwOpts = getBatchWriterOptions(job);
+
+ // passive check
+ assertEquals(bwConfig.getMaxLatency(TimeUnit.MILLISECONDS), bwOpts.getMaxLatency(TimeUnit.MILLISECONDS));
+ assertEquals(bwConfig.getTimeout(TimeUnit.MILLISECONDS), bwOpts.getTimeout(TimeUnit.MILLISECONDS));
+ assertEquals(bwConfig.getMaxWriteThreads(), bwOpts.getMaxWriteThreads());
+ assertEquals(bwConfig.getMaxMemory(), bwOpts.getMaxMemory());
+
+ // explicit check
+ assertEquals(7654321l, bwOpts.getMaxLatency(TimeUnit.MILLISECONDS));
+ assertEquals(9898989l, bwOpts.getTimeout(TimeUnit.MILLISECONDS));
+ assertEquals(42, bwOpts.getMaxWriteThreads());
+ assertEquals(1123581321l, bwOpts.getMaxMemory());
+
+ }
+ };
+ myAOF.checkOutputSpecs(null, job);
+ }
+
+ @Test
+ public void testMR() throws Exception {
+ MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
+ Connector c = mockInstance.getConnector("root", new PasswordToken(""));
+ c.tableOperations().create(TEST_TABLE_1);
+ c.tableOperations().create(TEST_TABLE_2);
+ BatchWriter bw = c.createBatchWriter(TEST_TABLE_1, new BatchWriterConfig());
+ for (int i = 0; i < 100; i++) {
+ Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
+ m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
+ bw.addMutation(m);
+ }
+ bw.close();
+
+ MRTester.main(new String[] {"root", "", TEST_TABLE_1, TEST_TABLE_2});
+ assertNull(e1);
+
+ Scanner scanner = c.createScanner(TEST_TABLE_2, new Authorizations());
+ Iterator<Entry<Key,Value>> iter = scanner.iterator();
+ assertTrue(iter.hasNext());
+ Entry<Key,Value> entry = iter.next();
+ assertEquals(Integer.parseInt(new String(entry.getValue().get())), 100);
+ assertFalse(iter.hasNext());
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloRowInputFormatTest.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloRowInputFormatTest.java b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloRowInputFormatTest.java
new file mode 100644
index 0000000..a0ae0b3
--- /dev/null
+++ b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloRowInputFormatTest.java
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapred;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.BatchWriterConfig;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.MutationsRejectedException;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.PasswordToken;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.KeyValue;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.ColumnVisibility;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.accumulo.core.util.PeekingIterator;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.lib.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.Test;
+
+public class AccumuloRowInputFormatTest {
+ private static final String PREFIX = AccumuloRowInputFormatTest.class.getSimpleName();
+ private static final String INSTANCE_NAME = PREFIX + "_mapred_instance";
+ private static final String TEST_TABLE_1 = PREFIX + "_mapred_table_1";
+
+ private static final String ROW1 = "row1";
+ private static final String ROW2 = "row2";
+ private static final String ROW3 = "row3";
+ private static final String COLF1 = "colf1";
+ private static List<Entry<Key,Value>> row1;
+ private static List<Entry<Key,Value>> row2;
+ private static List<Entry<Key,Value>> row3;
+ private static AssertionError e1 = null;
+ private static AssertionError e2 = null;
+
+ public AccumuloRowInputFormatTest() {
+ row1 = new ArrayList<Entry<Key,Value>>();
+ row1.add(new KeyValue(new Key(ROW1, COLF1, "colq1"), "v1".getBytes()));
+ row1.add(new KeyValue(new Key(ROW1, COLF1, "colq2"), "v2".getBytes()));
+ row1.add(new KeyValue(new Key(ROW1, "colf2", "colq3"), "v3".getBytes()));
+ row2 = new ArrayList<Entry<Key,Value>>();
+ row2.add(new KeyValue(new Key(ROW2, COLF1, "colq4"), "v4".getBytes()));
+ row3 = new ArrayList<Entry<Key,Value>>();
+ row3.add(new KeyValue(new Key(ROW3, COLF1, "colq5"), "v5".getBytes()));
+ }
+
+ public static void checkLists(final List<Entry<Key,Value>> first, final List<Entry<Key,Value>> second) {
+ assertEquals("Sizes should be the same.", first.size(), second.size());
+ for (int i = 0; i < first.size(); i++) {
+ assertEquals("Keys should be equal.", first.get(i).getKey(), second.get(i).getKey());
+ assertEquals("Values should be equal.", first.get(i).getValue(), second.get(i).getValue());
+ }
+ }
+
+ public static void checkLists(final List<Entry<Key,Value>> first, final Iterator<Entry<Key,Value>> second) {
+ int entryIndex = 0;
+ while (second.hasNext()) {
+ final Entry<Key,Value> entry = second.next();
+ assertEquals("Keys should be equal", first.get(entryIndex).getKey(), entry.getKey());
+ assertEquals("Values should be equal", first.get(entryIndex).getValue(), entry.getValue());
+ entryIndex++;
+ }
+ }
+
+ public static void insertList(final BatchWriter writer, final List<Entry<Key,Value>> list) throws MutationsRejectedException {
+ for (Entry<Key,Value> e : list) {
+ final Key key = e.getKey();
+ final Mutation mutation = new Mutation(key.getRow());
+ ColumnVisibility colVisibility = new ColumnVisibility(key.getColumnVisibility());
+ mutation.put(key.getColumnFamily(), key.getColumnQualifier(), colVisibility, key.getTimestamp(), e.getValue());
+ writer.addMutation(mutation);
+ }
+ }
+
+ private static class MRTester extends Configured implements Tool {
+ public static class TestMapper implements Mapper<Text,PeekingIterator<Entry<Key,Value>>,Key,Value> {
+ int count = 0;
+
+ @Override
+ public void map(Text k, PeekingIterator<Entry<Key,Value>> v, OutputCollector<Key,Value> output, Reporter reporter) throws IOException {
+ try {
+ switch (count) {
+ case 0:
+ assertEquals("Current key should be " + ROW1, new Text(ROW1), k);
+ checkLists(row1, v);
+ break;
+ case 1:
+ assertEquals("Current key should be " + ROW2, new Text(ROW2), k);
+ checkLists(row2, v);
+ break;
+ case 2:
+ assertEquals("Current key should be " + ROW3, new Text(ROW3), k);
+ checkLists(row3, v);
+ break;
+ default:
+ assertTrue(false);
+ }
+ } catch (AssertionError e) {
+ e1 = e;
+ }
+ count++;
+ }
+
+ @Override
+ public void configure(JobConf job) {}
+
+ @Override
+ public void close() throws IOException {
+ try {
+ assertEquals(3, count);
+ } catch (AssertionError e) {
+ e2 = e;
+ }
+ }
+
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+
+ if (args.length != 3) {
+ throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <user> <pass> <table>");
+ }
+
+ String user = args[0];
+ String pass = args[1];
+ String table = args[2];
+
+ JobConf job = new JobConf(getConf());
+ job.setJarByClass(this.getClass());
+
+ job.setInputFormat(AccumuloRowInputFormat.class);
+
+ AccumuloInputFormat.setConnectorInfo(job, user, new PasswordToken(pass));
+ AccumuloInputFormat.setInputTableName(job, table);
+ AccumuloRowInputFormat.setMockInstance(job, INSTANCE_NAME);
+
+ job.setMapperClass(TestMapper.class);
+ job.setMapOutputKeyClass(Key.class);
+ job.setMapOutputValueClass(Value.class);
+ job.setOutputFormat(NullOutputFormat.class);
+
+ job.setNumReduceTasks(0);
+
+ return JobClient.runJob(job).isSuccessful() ? 0 : 1;
+ }
+
+ public static void main(String[] args) throws Exception {
+ assertEquals(0, ToolRunner.run(CachedConfiguration.getInstance(), new MRTester(), args));
+ }
+ }
+
+ @Test
+ public void test() throws Exception {
+ final MockInstance instance = new MockInstance(INSTANCE_NAME);
+ final Connector conn = instance.getConnector("root", new PasswordToken(""));
+ conn.tableOperations().create(TEST_TABLE_1);
+ BatchWriter writer = null;
+ try {
+ writer = conn.createBatchWriter(TEST_TABLE_1, new BatchWriterConfig());
+ insertList(writer, row1);
+ insertList(writer, row2);
+ insertList(writer, row3);
+ } finally {
+ if (writer != null) {
+ writer.close();
+ }
+ }
+ MRTester.main(new String[] {"root", "", TEST_TABLE_1});
+ assertNull(e1);
+ assertNull(e2);
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/test/java/org/apache/accumulo/core/client/mapred/TokenFileTest.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/test/java/org/apache/accumulo/core/client/mapred/TokenFileTest.java b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapred/TokenFileTest.java
new file mode 100644
index 0000000..0e1fe39
--- /dev/null
+++ b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapred/TokenFileTest.java
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapred;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Iterator;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.BatchWriterConfig;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.PasswordToken;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.security.Credentials;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+/**
+ *
+ */
+public class TokenFileTest {
+ private static AssertionError e1 = null;
+ private static final String PREFIX = TokenFileTest.class.getSimpleName();
+ private static final String INSTANCE_NAME = PREFIX + "_mapred_instance";
+ private static final String TEST_TABLE_1 = PREFIX + "_mapred_table_1";
+ private static final String TEST_TABLE_2 = PREFIX + "_mapred_table_2";
+
+ private static class MRTokenFileTester extends Configured implements Tool {
+ private static class TestMapper implements Mapper<Key,Value,Text,Mutation> {
+ Key key = null;
+ int count = 0;
+ OutputCollector<Text,Mutation> finalOutput;
+
+ @Override
+ public void map(Key k, Value v, OutputCollector<Text,Mutation> output, Reporter reporter) throws IOException {
+ finalOutput = output;
+ try {
+ if (key != null)
+ assertEquals(key.getRow().toString(), new String(v.get()));
+ assertEquals(k.getRow(), new Text(String.format("%09x", count + 1)));
+ assertEquals(new String(v.get()), String.format("%09x", count));
+ } catch (AssertionError e) {
+ e1 = e;
+ }
+ key = new Key(k);
+ count++;
+ }
+
+ @Override
+ public void configure(JobConf job) {}
+
+ @Override
+ public void close() throws IOException {
+ Mutation m = new Mutation("total");
+ m.put("", "", Integer.toString(count));
+ finalOutput.collect(new Text(), m);
+ }
+
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+
+ if (args.length != 4) {
+ throw new IllegalArgumentException("Usage : " + MRTokenFileTester.class.getName() + " <user> <token file> <inputtable> <outputtable>");
+ }
+
+ String user = args[0];
+ String tokenFile = args[1];
+ String table1 = args[2];
+ String table2 = args[3];
+
+ JobConf job = new JobConf(getConf());
+ job.setJarByClass(this.getClass());
+
+ job.setInputFormat(AccumuloInputFormat.class);
+
+ AccumuloInputFormat.setConnectorInfo(job, user, tokenFile);
+ AccumuloInputFormat.setInputTableName(job, table1);
+ AccumuloInputFormat.setMockInstance(job, INSTANCE_NAME);
+
+ job.setMapperClass(TestMapper.class);
+ job.setMapOutputKeyClass(Key.class);
+ job.setMapOutputValueClass(Value.class);
+ job.setOutputFormat(AccumuloOutputFormat.class);
+ job.setOutputKeyClass(Text.class);
+ job.setOutputValueClass(Mutation.class);
+
+ AccumuloOutputFormat.setConnectorInfo(job, user, tokenFile);
+ AccumuloOutputFormat.setCreateTables(job, false);
+ AccumuloOutputFormat.setDefaultTableName(job, table2);
+ AccumuloOutputFormat.setMockInstance(job, INSTANCE_NAME);
+
+ job.setNumReduceTasks(0);
+
+ return JobClient.runJob(job).isSuccessful() ? 0 : 1;
+ }
+
+ public static void main(String[] args) throws Exception {
+ Configuration conf = CachedConfiguration.getInstance();
+ conf.set("hadoop.tmp.dir", new File(args[1]).getParent());
+ assertEquals(0, ToolRunner.run(conf, new MRTokenFileTester(), args));
+ }
+ }
+
+ @Rule
+ public TemporaryFolder folder = new TemporaryFolder(new File(System.getProperty("user.dir") + "/target"));
+
+ @Test
+ public void testMR() throws Exception {
+ MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
+ Connector c = mockInstance.getConnector("root", new PasswordToken(""));
+ c.tableOperations().create(TEST_TABLE_1);
+ c.tableOperations().create(TEST_TABLE_2);
+ BatchWriter bw = c.createBatchWriter(TEST_TABLE_1, new BatchWriterConfig());
+ for (int i = 0; i < 100; i++) {
+ Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
+ m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
+ bw.addMutation(m);
+ }
+ bw.close();
+
+ File tf = folder.newFile("root_test.pw");
+ PrintStream out = new PrintStream(tf);
+ String outString = new Credentials("root", new PasswordToken("")).serialize();
+ out.println(outString);
+ out.close();
+
+ MRTokenFileTester.main(new String[] {"root", tf.getAbsolutePath(), TEST_TABLE_1, TEST_TABLE_2});
+ assertNull(e1);
+
+ Scanner scanner = c.createScanner(TEST_TABLE_2, new Authorizations());
+ Iterator<Entry<Key,Value>> iter = scanner.iterator();
+ assertTrue(iter.hasNext());
+ Entry<Key,Value> entry = iter.next();
+ assertEquals(Integer.parseInt(new String(entry.getValue().get())), 100);
+ assertFalse(iter.hasNext());
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormatTest.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormatTest.java b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormatTest.java
new file mode 100644
index 0000000..2a453e3
--- /dev/null
+++ b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormatTest.java
@@ -0,0 +1,239 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.FileFilter;
+import java.io.IOException;
+
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.BatchWriterConfig;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.PasswordToken;
+import org.apache.accumulo.core.conf.AccumuloConfiguration;
+import org.apache.accumulo.core.conf.Property;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+public class AccumuloFileOutputFormatTest {
+ private static final String PREFIX = AccumuloFileOutputFormatTest.class.getSimpleName();
+ private static final String INSTANCE_NAME = PREFIX + "_mapreduce_instance";
+ private static final String BAD_TABLE = PREFIX + "_mapreduce_bad_table";
+ private static final String TEST_TABLE = PREFIX + "_mapreduce_test_table";
+ private static final String EMPTY_TABLE = PREFIX + "_mapreduce_empty_table";
+
+ private static AssertionError e1 = null;
+ private static AssertionError e2 = null;
+
+ @Rule
+ public TemporaryFolder folder = new TemporaryFolder(new File(System.getProperty("user.dir") + "/target"));
+
+ @BeforeClass
+ public static void setup() throws Exception {
+ MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
+ Connector c = mockInstance.getConnector("root", new PasswordToken(""));
+ c.tableOperations().create(EMPTY_TABLE);
+ c.tableOperations().create(TEST_TABLE);
+ c.tableOperations().create(BAD_TABLE);
+ BatchWriter bw = c.createBatchWriter(TEST_TABLE, new BatchWriterConfig());
+ Mutation m = new Mutation("Key");
+ m.put("", "", "");
+ bw.addMutation(m);
+ bw.close();
+ bw = c.createBatchWriter(BAD_TABLE, new BatchWriterConfig());
+ m = new Mutation("r1");
+ m.put("cf1", "cq1", "A&B");
+ m.put("cf1", "cq1", "A&B");
+ m.put("cf1", "cq2", "A&");
+ bw.addMutation(m);
+ bw.close();
+ }
+
+ @Test
+ public void testEmptyWrite() throws Exception {
+ handleWriteTests(false);
+ }
+
+ @Test
+ public void testRealWrite() throws Exception {
+ handleWriteTests(true);
+ }
+
+ private static class MRTester extends Configured implements Tool {
+ private static class BadKeyMapper extends Mapper<Key,Value,Key,Value> {
+ int index = 0;
+
+ @Override
+ protected void map(Key key, Value value, Context context) throws IOException, InterruptedException {
+ try {
+ try {
+ context.write(key, value);
+ if (index == 2)
+ assertTrue(false);
+ } catch (Exception e) {
+ assertEquals(2, index);
+ }
+ } catch (AssertionError e) {
+ e1 = e;
+ }
+ index++;
+ }
+
+ @Override
+ protected void cleanup(Context context) throws IOException, InterruptedException {
+ try {
+ assertEquals(2, index);
+ } catch (AssertionError e) {
+ e2 = e;
+ }
+ }
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+
+ if (args.length != 4) {
+ throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <user> <pass> <table> <outputfile>");
+ }
+
+ String user = args[0];
+ String pass = args[1];
+ String table = args[2];
+
+ @SuppressWarnings("deprecation")
+ Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
+ job.setJarByClass(this.getClass());
+
+ job.setInputFormatClass(AccumuloInputFormat.class);
+
+ AccumuloInputFormat.setConnectorInfo(job, user, new PasswordToken(pass));
+ AccumuloInputFormat.setInputTableName(job, table);
+ AccumuloInputFormat.setMockInstance(job, INSTANCE_NAME);
+ AccumuloFileOutputFormat.setOutputPath(job, new Path(args[3]));
+
+ job.setMapperClass(BAD_TABLE.equals(table) ? BadKeyMapper.class : Mapper.class);
+ job.setMapOutputKeyClass(Key.class);
+ job.setMapOutputValueClass(Value.class);
+ job.setOutputFormatClass(AccumuloFileOutputFormat.class);
+
+ job.setNumReduceTasks(0);
+
+ job.waitForCompletion(true);
+
+ return job.isSuccessful() ? 0 : 1;
+ }
+
+ public static void main(String[] args) throws Exception {
+ assertEquals(0, ToolRunner.run(CachedConfiguration.getInstance(), new MRTester(), args));
+ }
+ }
+
+ public void handleWriteTests(boolean content) throws Exception {
+ File f = folder.newFile("handleWriteTests");
+ f.delete();
+ MRTester.main(new String[] {"root", "", content ? TEST_TABLE : EMPTY_TABLE, f.getAbsolutePath()});
+
+ assertTrue(f.exists());
+ File[] files = f.listFiles(new FileFilter() {
+ @Override
+ public boolean accept(File file) {
+ return file.getName().startsWith("part-m-");
+ }
+ });
+ if (content) {
+ assertEquals(1, files.length);
+ assertTrue(files[0].exists());
+ } else {
+ assertEquals(0, files.length);
+ }
+ }
+
+ @Test
+ public void writeBadVisibility() throws Exception {
+ File f = folder.newFile("writeBadVisibility");
+ f.delete();
+ MRTester.main(new String[] {"root", "", BAD_TABLE, f.getAbsolutePath()});
+ assertNull(e1);
+ assertNull(e2);
+ }
+
+ @Test
+ public void validateConfiguration() throws IOException, InterruptedException {
+
+ int a = 7;
+ long b = 300l;
+ long c = 50l;
+ long d = 10l;
+ String e = "snappy";
+
+ @SuppressWarnings("deprecation")
+ Job job1 = new Job();
+ AccumuloFileOutputFormat.setReplication(job1, a);
+ AccumuloFileOutputFormat.setFileBlockSize(job1, b);
+ AccumuloFileOutputFormat.setDataBlockSize(job1, c);
+ AccumuloFileOutputFormat.setIndexBlockSize(job1, d);
+ AccumuloFileOutputFormat.setCompressionType(job1, e);
+
+ AccumuloConfiguration acuconf = AccumuloFileOutputFormat.getAccumuloConfiguration(job1);
+
+ assertEquals(7, acuconf.getCount(Property.TABLE_FILE_REPLICATION));
+ assertEquals(300l, acuconf.getMemoryInBytes(Property.TABLE_FILE_BLOCK_SIZE));
+ assertEquals(50l, acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE));
+ assertEquals(10l, acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX));
+ assertEquals("snappy", acuconf.get(Property.TABLE_FILE_COMPRESSION_TYPE));
+
+ a = 17;
+ b = 1300l;
+ c = 150l;
+ d = 110l;
+ e = "lzo";
+
+ @SuppressWarnings("deprecation")
+ Job job2 = new Job();
+ AccumuloFileOutputFormat.setReplication(job2, a);
+ AccumuloFileOutputFormat.setFileBlockSize(job2, b);
+ AccumuloFileOutputFormat.setDataBlockSize(job2, c);
+ AccumuloFileOutputFormat.setIndexBlockSize(job2, d);
+ AccumuloFileOutputFormat.setCompressionType(job2, e);
+
+ acuconf = AccumuloFileOutputFormat.getAccumuloConfiguration(job2);
+
+ assertEquals(17, acuconf.getCount(Property.TABLE_FILE_REPLICATION));
+ assertEquals(1300l, acuconf.getMemoryInBytes(Property.TABLE_FILE_BLOCK_SIZE));
+ assertEquals(150l, acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE));
+ assertEquals(110l, acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX));
+ assertEquals("lzo", acuconf.get(Property.TABLE_FILE_COMPRESSION_TYPE));
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormatTest.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormatTest.java b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormatTest.java
new file mode 100644
index 0000000..2500972
--- /dev/null
+++ b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormatTest.java
@@ -0,0 +1,412 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.BatchWriterConfig;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.Instance;
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.PasswordToken;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.user.RegExFilter;
+import org.apache.accumulo.core.iterators.user.WholeRowIterator;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.accumulo.core.util.Pair;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.log4j.Level;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class AccumuloInputFormatTest {
+
+ private static final String PREFIX = AccumuloInputFormatTest.class.getSimpleName();
+
+ /**
+ * Check that the iterator configuration is getting stored in the Job conf correctly.
+ */
+ @Test
+ public void testSetIterator() throws IOException {
+ @SuppressWarnings("deprecation")
+ Job job = new Job();
+
+ IteratorSetting is = new IteratorSetting(1, "WholeRow", "org.apache.accumulo.core.iterators.WholeRowIterator");
+ AccumuloInputFormat.addIterator(job, is);
+ Configuration conf = job.getConfiguration();
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ is.write(new DataOutputStream(baos));
+ String iterators = conf.get("AccumuloInputFormat.ScanOpts.Iterators");
+ assertEquals(new String(Base64.encodeBase64(baos.toByteArray())), iterators);
+ }
+
+ @Test
+ public void testAddIterator() throws IOException {
+ @SuppressWarnings("deprecation")
+ Job job = new Job();
+
+ AccumuloInputFormat.addIterator(job, new IteratorSetting(1, "WholeRow", WholeRowIterator.class));
+ AccumuloInputFormat.addIterator(job, new IteratorSetting(2, "Versions", "org.apache.accumulo.core.iterators.VersioningIterator"));
+ IteratorSetting iter = new IteratorSetting(3, "Count", "org.apache.accumulo.core.iterators.CountingIterator");
+ iter.addOption("v1", "1");
+ iter.addOption("junk", "\0omg:!\\xyzzy");
+ AccumuloInputFormat.addIterator(job, iter);
+
+ List<IteratorSetting> list = AccumuloInputFormat.getIterators(job);
+
+ // Check the list size
+ assertTrue(list.size() == 3);
+
+ // Walk the list and make sure our settings are correct
+ IteratorSetting setting = list.get(0);
+ assertEquals(1, setting.getPriority());
+ assertEquals("org.apache.accumulo.core.iterators.user.WholeRowIterator", setting.getIteratorClass());
+ assertEquals("WholeRow", setting.getName());
+ assertEquals(0, setting.getOptions().size());
+
+ setting = list.get(1);
+ assertEquals(2, setting.getPriority());
+ assertEquals("org.apache.accumulo.core.iterators.VersioningIterator", setting.getIteratorClass());
+ assertEquals("Versions", setting.getName());
+ assertEquals(0, setting.getOptions().size());
+
+ setting = list.get(2);
+ assertEquals(3, setting.getPriority());
+ assertEquals("org.apache.accumulo.core.iterators.CountingIterator", setting.getIteratorClass());
+ assertEquals("Count", setting.getName());
+ assertEquals(2, setting.getOptions().size());
+ assertEquals("1", setting.getOptions().get("v1"));
+ assertEquals("\0omg:!\\xyzzy", setting.getOptions().get("junk"));
+ }
+
+ /**
+ * Test adding iterator options where the keys and values contain both the FIELD_SEPARATOR character (':') and ITERATOR_SEPARATOR (',') characters. There
+ * should be no exceptions thrown when trying to parse these types of option entries.
+ *
+ * This test makes sure that the expected raw values, as appears in the Job, are equal to what's expected.
+ */
+ @Test
+ public void testIteratorOptionEncoding() throws Throwable {
+ String key = "colon:delimited:key";
+ String value = "comma,delimited,value";
+ IteratorSetting someSetting = new IteratorSetting(1, "iterator", "Iterator.class");
+ someSetting.addOption(key, value);
+ @SuppressWarnings("deprecation")
+ Job job = new Job();
+ AccumuloInputFormat.addIterator(job, someSetting);
+
+ List<IteratorSetting> list = AccumuloInputFormat.getIterators(job);
+ assertEquals(1, list.size());
+ assertEquals(1, list.get(0).getOptions().size());
+ assertEquals(list.get(0).getOptions().get(key), value);
+
+ someSetting.addOption(key + "2", value);
+ someSetting.setPriority(2);
+ someSetting.setName("it2");
+ AccumuloInputFormat.addIterator(job, someSetting);
+ list = AccumuloInputFormat.getIterators(job);
+ assertEquals(2, list.size());
+ assertEquals(1, list.get(0).getOptions().size());
+ assertEquals(list.get(0).getOptions().get(key), value);
+ assertEquals(2, list.get(1).getOptions().size());
+ assertEquals(list.get(1).getOptions().get(key), value);
+ assertEquals(list.get(1).getOptions().get(key + "2"), value);
+ }
+
+ /**
+ * Test getting iterator settings for multiple iterators set
+ */
+ @Test
+ public void testGetIteratorSettings() throws IOException {
+ @SuppressWarnings("deprecation")
+ Job job = new Job();
+
+ AccumuloInputFormat.addIterator(job, new IteratorSetting(1, "WholeRow", "org.apache.accumulo.core.iterators.WholeRowIterator"));
+ AccumuloInputFormat.addIterator(job, new IteratorSetting(2, "Versions", "org.apache.accumulo.core.iterators.VersioningIterator"));
+ AccumuloInputFormat.addIterator(job, new IteratorSetting(3, "Count", "org.apache.accumulo.core.iterators.CountingIterator"));
+
+ List<IteratorSetting> list = AccumuloInputFormat.getIterators(job);
+
+ // Check the list size
+ assertTrue(list.size() == 3);
+
+ // Walk the list and make sure our settings are correct
+ IteratorSetting setting = list.get(0);
+ assertEquals(1, setting.getPriority());
+ assertEquals("org.apache.accumulo.core.iterators.WholeRowIterator", setting.getIteratorClass());
+ assertEquals("WholeRow", setting.getName());
+
+ setting = list.get(1);
+ assertEquals(2, setting.getPriority());
+ assertEquals("org.apache.accumulo.core.iterators.VersioningIterator", setting.getIteratorClass());
+ assertEquals("Versions", setting.getName());
+
+ setting = list.get(2);
+ assertEquals(3, setting.getPriority());
+ assertEquals("org.apache.accumulo.core.iterators.CountingIterator", setting.getIteratorClass());
+ assertEquals("Count", setting.getName());
+
+ }
+
+ @Test
+ public void testSetRegex() throws IOException {
+ @SuppressWarnings("deprecation")
+ Job job = new Job();
+
+ String regex = ">\"*%<>\'\\";
+
+ IteratorSetting is = new IteratorSetting(50, regex, RegExFilter.class);
+ RegExFilter.setRegexs(is, regex, null, null, null, false);
+ AccumuloInputFormat.addIterator(job, is);
+
+ assertTrue(regex.equals(AccumuloInputFormat.getIterators(job).get(0).getName()));
+ }
+
+ private static AssertionError e1 = null;
+ private static AssertionError e2 = null;
+
+ private static class MRTester extends Configured implements Tool {
+ private static class TestMapper extends Mapper<Key,Value,Key,Value> {
+ Key key = null;
+ int count = 0;
+
+ @Override
+ protected void map(Key k, Value v, Context context) throws IOException, InterruptedException {
+ try {
+ if (key != null)
+ assertEquals(key.getRow().toString(), new String(v.get()));
+ assertEquals(k.getRow(), new Text(String.format("%09x", count + 1)));
+ assertEquals(new String(v.get()), String.format("%09x", count));
+ } catch (AssertionError e) {
+ e1 = e;
+ }
+ key = new Key(k);
+ count++;
+ }
+
+ @Override
+ protected void cleanup(Context context) throws IOException, InterruptedException {
+ try {
+ assertEquals(100, count);
+ } catch (AssertionError e) {
+ e2 = e;
+ }
+ }
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+
+ if (args.length != 5) {
+ throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <user> <pass> <table> <instanceName> <inputFormatClass>");
+ }
+
+ String user = args[0];
+ String pass = args[1];
+ String table = args[2];
+
+ String instanceName = args[3];
+ String inputFormatClassName = args[4];
+ @SuppressWarnings("unchecked")
+ Class<? extends InputFormat<?,?>> inputFormatClass = (Class<? extends InputFormat<?,?>>) Class.forName(inputFormatClassName);
+
+ @SuppressWarnings("deprecation")
+ Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
+ job.setJarByClass(this.getClass());
+
+ job.setInputFormatClass(inputFormatClass);
+
+ AccumuloInputFormat.setConnectorInfo(job, user, new PasswordToken(pass));
+ AccumuloInputFormat.setInputTableName(job, table);
+ AccumuloInputFormat.setMockInstance(job, instanceName);
+
+ job.setMapperClass(TestMapper.class);
+ job.setMapOutputKeyClass(Key.class);
+ job.setMapOutputValueClass(Value.class);
+ job.setOutputFormatClass(NullOutputFormat.class);
+
+ job.setNumReduceTasks(0);
+
+ job.waitForCompletion(true);
+
+ return job.isSuccessful() ? 0 : 1;
+ }
+
+ public static int main(String[] args) throws Exception {
+ return ToolRunner.run(CachedConfiguration.getInstance(), new MRTester(), args);
+ }
+ }
+
+ @Test
+ public void testMap() throws Exception {
+ final String INSTANCE_NAME = PREFIX + "_mapreduce_instance";
+ final String TEST_TABLE_1 = PREFIX + "_mapreduce_table_1";
+
+ MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
+ Connector c = mockInstance.getConnector("root", new PasswordToken(""));
+ c.tableOperations().create(TEST_TABLE_1);
+ BatchWriter bw = c.createBatchWriter(TEST_TABLE_1, new BatchWriterConfig());
+ for (int i = 0; i < 100; i++) {
+ Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
+ m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
+ bw.addMutation(m);
+ }
+ bw.close();
+
+ Assert.assertEquals(0, MRTester.main(new String[] {"root", "", TEST_TABLE_1, INSTANCE_NAME, AccumuloInputFormat.class.getCanonicalName()}));
+ assertNull(e1);
+ assertNull(e2);
+ }
+
+ @Test
+ public void testCorrectRangeInputSplits() throws Exception {
+ @SuppressWarnings("deprecation")
+ Job job = new Job(new Configuration(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
+
+ String username = "user", table = "table", instance = "instance";
+ PasswordToken password = new PasswordToken("password");
+ Authorizations auths = new Authorizations("foo");
+ Collection<Pair<Text,Text>> fetchColumns = Collections.singleton(new Pair<Text,Text>(new Text("foo"), new Text("bar")));
+ boolean isolated = true, localIters = true;
+ Level level = Level.WARN;
+
+ Instance inst = new MockInstance(instance);
+ Connector connector = inst.getConnector(username, password);
+ connector.tableOperations().create(table);
+
+ AccumuloInputFormat.setConnectorInfo(job, username, password);
+ AccumuloInputFormat.setInputTableName(job, table);
+ AccumuloInputFormat.setScanAuthorizations(job, auths);
+ AccumuloInputFormat.setMockInstance(job, instance);
+ AccumuloInputFormat.setScanIsolation(job, isolated);
+ AccumuloInputFormat.setLocalIterators(job, localIters);
+ AccumuloInputFormat.fetchColumns(job, fetchColumns);
+ AccumuloInputFormat.setLogLevel(job, level);
+
+ AccumuloInputFormat aif = new AccumuloInputFormat();
+
+ List<InputSplit> splits = aif.getSplits(job);
+
+ Assert.assertEquals(1, splits.size());
+
+ InputSplit split = splits.get(0);
+
+ Assert.assertEquals(RangeInputSplit.class, split.getClass());
+
+ RangeInputSplit risplit = (RangeInputSplit) split;
+
+ Assert.assertEquals(username, risplit.getPrincipal());
+ Assert.assertEquals(table, risplit.getTableName());
+ Assert.assertEquals(password, risplit.getToken());
+ Assert.assertEquals(auths, risplit.getAuths());
+ Assert.assertEquals(instance, risplit.getInstanceName());
+ Assert.assertEquals(isolated, risplit.isIsolatedScan());
+ Assert.assertEquals(localIters, risplit.usesLocalIterators());
+ Assert.assertEquals(fetchColumns, risplit.getFetchedColumns());
+ Assert.assertEquals(level, risplit.getLogLevel());
+ }
+
+ @Test
+ public void testPartialInputSplitDelegationToConfiguration() throws Exception {
+ String user = "testPartialInputSplitUser";
+ PasswordToken password = new PasswordToken("");
+
+ MockInstance mockInstance = new MockInstance("testPartialInputSplitDelegationToConfiguration");
+ Connector c = mockInstance.getConnector(user, password);
+ c.tableOperations().create("testtable");
+ BatchWriter bw = c.createBatchWriter("testtable", new BatchWriterConfig());
+ for (int i = 0; i < 100; i++) {
+ Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
+ m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
+ bw.addMutation(m);
+ }
+ bw.close();
+
+ Assert.assertEquals(
+ 0,
+ MRTester.main(new String[] {user, "", "testtable", "testPartialInputSplitDelegationToConfiguration",
+ EmptySplitsAccumuloInputFormat.class.getCanonicalName()}));
+ assertNull(e1);
+ assertNull(e2);
+ }
+
+ @Test
+ public void testPartialFailedInputSplitDelegationToConfiguration() throws Exception {
+ String user = "testPartialFailedInputSplit";
+ PasswordToken password = new PasswordToken("");
+
+ MockInstance mockInstance = new MockInstance("testPartialFailedInputSplitDelegationToConfiguration");
+ Connector c = mockInstance.getConnector(user, password);
+ c.tableOperations().create("testtable");
+ BatchWriter bw = c.createBatchWriter("testtable", new BatchWriterConfig());
+ for (int i = 0; i < 100; i++) {
+ Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
+ m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
+ bw.addMutation(m);
+ }
+ bw.close();
+
+ // We should fail before we even get into the Mapper because we can't make the RecordReader
+ Assert.assertEquals(
+ 1,
+ MRTester.main(new String[] {user, "", "testtable", "testPartialFailedInputSplitDelegationToConfiguration",
+ BadPasswordSplitsAccumuloInputFormat.class.getCanonicalName()}));
+ assertNull(e1);
+ assertNull(e2);
+ }
+
+ @Test
+ public void testEmptyColumnFamily() throws IOException {
+ @SuppressWarnings("deprecation")
+ Job job = new Job();
+ Set<Pair<Text,Text>> cols = new HashSet<Pair<Text,Text>>();
+ cols.add(new Pair<Text,Text>(new Text(""), null));
+ cols.add(new Pair<Text,Text>(new Text("foo"), new Text("bar")));
+ cols.add(new Pair<Text,Text>(new Text(""), new Text("bar")));
+ cols.add(new Pair<Text,Text>(new Text(""), new Text("")));
+ cols.add(new Pair<Text,Text>(new Text("foo"), new Text("")));
+ AccumuloInputFormat.fetchColumns(job, cols);
+ Set<Pair<Text,Text>> setCols = AccumuloInputFormat.getFetchedColumns(job);
+ assertEquals(cols, setCols);
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormatTest.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormatTest.java b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormatTest.java
new file mode 100644
index 0000000..05fbbb4
--- /dev/null
+++ b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormatTest.java
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.BatchWriterConfig;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.PasswordToken;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.accumulo.core.util.Pair;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.Test;
+
+public class AccumuloMultiTableInputFormatTest {
+
+ private static final String PREFIX = AccumuloMultiTableInputFormatTest.class.getSimpleName();
+ private static final String INSTANCE_NAME = PREFIX + "_mapreduce_instance";
+ private static final String TEST_TABLE_1 = PREFIX + "_mapreduce_table_1";
+ private static final String TEST_TABLE_2 = PREFIX + "_mapreduce_table_2";
+
+ private static AssertionError e1 = null;
+ private static AssertionError e2 = null;
+
+ private static class MRTester extends Configured implements Tool {
+
+ private static class TestMapper extends Mapper<Key,Value,Key,Value> {
+ Key key = null;
+ int count = 0;
+
+ @Override
+ protected void map(Key k, Value v, Context context) throws IOException, InterruptedException {
+ try {
+ String tableName = ((RangeInputSplit) context.getInputSplit()).getTableName();
+ if (key != null)
+ assertEquals(key.getRow().toString(), new String(v.get()));
+ assertEquals(new Text(String.format("%s_%09x", tableName, count + 1)), k.getRow());
+ assertEquals(String.format("%s_%09x", tableName, count), new String(v.get()));
+ } catch (AssertionError e) {
+ e1 = e;
+ }
+ key = new Key(k);
+ count++;
+ }
+
+ @Override
+ protected void cleanup(Context context) throws IOException, InterruptedException {
+ try {
+ assertEquals(100, count);
+ } catch (AssertionError e) {
+ e2 = e;
+ }
+ }
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+
+ if (args.length != 4) {
+ throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <user> <pass> <table1> <table2>");
+ }
+
+ String user = args[0];
+ String pass = args[1];
+ String table1 = args[2];
+ String table2 = args[3];
+
+ @SuppressWarnings("deprecation")
+ Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
+ job.setJarByClass(this.getClass());
+
+ job.setInputFormatClass(AccumuloMultiTableInputFormat.class);
+
+ AccumuloMultiTableInputFormat.setConnectorInfo(job, user, new PasswordToken(pass));
+
+ InputTableConfig tableConfig1 = new InputTableConfig();
+ InputTableConfig tableConfig2 = new InputTableConfig();
+
+ Map<String,InputTableConfig> configMap = new HashMap<String,InputTableConfig>();
+ configMap.put(table1, tableConfig1);
+ configMap.put(table2, tableConfig2);
+
+ AccumuloMultiTableInputFormat.setInputTableConfigs(job, configMap);
+ AccumuloMultiTableInputFormat.setMockInstance(job, INSTANCE_NAME);
+
+ job.setMapperClass(TestMapper.class);
+ job.setMapOutputKeyClass(Key.class);
+ job.setMapOutputValueClass(Value.class);
+ job.setOutputFormatClass(NullOutputFormat.class);
+
+ job.setNumReduceTasks(0);
+
+ job.waitForCompletion(true);
+
+ return job.isSuccessful() ? 0 : 1;
+ }
+
+ public static void main(String[] args) throws Exception {
+ assertEquals(0, ToolRunner.run(CachedConfiguration.getInstance(), new MRTester(), args));
+ }
+ }
+
+ /**
+ * Generate incrementing counts and attach table name to the key/value so that order and multi-table data can be verified.
+ */
+ @Test
+ public void testMap() throws Exception {
+ MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
+ Connector c = mockInstance.getConnector("root", new PasswordToken(""));
+ c.tableOperations().create(TEST_TABLE_1);
+ c.tableOperations().create(TEST_TABLE_2);
+ BatchWriter bw = c.createBatchWriter(TEST_TABLE_1, new BatchWriterConfig());
+ BatchWriter bw2 = c.createBatchWriter(TEST_TABLE_2, new BatchWriterConfig());
+ for (int i = 0; i < 100; i++) {
+ Mutation t1m = new Mutation(new Text(String.format("%s_%09x", TEST_TABLE_1, i + 1)));
+ t1m.put(new Text(), new Text(), new Value(String.format("%s_%09x", TEST_TABLE_1, i).getBytes()));
+ bw.addMutation(t1m);
+ Mutation t2m = new Mutation(new Text(String.format("%s_%09x", TEST_TABLE_2, i + 1)));
+ t2m.put(new Text(), new Text(), new Value(String.format("%s_%09x", TEST_TABLE_2, i).getBytes()));
+ bw2.addMutation(t2m);
+ }
+ bw.close();
+ bw2.close();
+
+ MRTester.main(new String[] {"root", "", TEST_TABLE_1, TEST_TABLE_2});
+ assertNull(e1);
+ assertNull(e2);
+ }
+
+ /**
+ * Verify {@link InputTableConfig} objects get correctly serialized in the JobContext.
+ */
+ @Test
+ public void testInputTableConfigSerialization() throws IOException {
+ @SuppressWarnings("deprecation")
+ Job job = new Job();
+
+ InputTableConfig tableConfig = new InputTableConfig().setRanges(Collections.singletonList(new Range("a", "b")))
+ .fetchColumns(Collections.singleton(new Pair<Text,Text>(new Text("CF1"), new Text("CQ1"))))
+ .setIterators(Collections.singletonList(new IteratorSetting(50, "iter1", "iterclass1")));
+
+ Map<String,InputTableConfig> configMap = new HashMap<String,InputTableConfig>();
+ configMap.put(TEST_TABLE_1, tableConfig);
+ configMap.put(TEST_TABLE_2, tableConfig);
+
+ AccumuloMultiTableInputFormat.setInputTableConfigs(job, configMap);
+
+ assertEquals(tableConfig, AccumuloMultiTableInputFormat.getInputTableConfig(job, TEST_TABLE_1));
+ assertEquals(tableConfig, AccumuloMultiTableInputFormat.getInputTableConfig(job, TEST_TABLE_2));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloOutputFormatTest.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloOutputFormatTest.java b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloOutputFormatTest.java
new file mode 100644
index 0000000..a0cb4e3
--- /dev/null
+++ b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloOutputFormatTest.java
@@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Map.Entry;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.BatchWriterConfig;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.PasswordToken;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.Test;
+
+/**
+ *
+ */
+public class AccumuloOutputFormatTest {
+ private static AssertionError e1 = null;
+ private static final String PREFIX = AccumuloOutputFormatTest.class.getSimpleName();
+ private static final String INSTANCE_NAME = PREFIX + "_mapreduce_instance";
+ private static final String TEST_TABLE_1 = PREFIX + "_mapreduce_table_1";
+ private static final String TEST_TABLE_2 = PREFIX + "_mapreduce_table_2";
+
+ private static class MRTester extends Configured implements Tool {
+ private static class TestMapper extends Mapper<Key,Value,Text,Mutation> {
+ Key key = null;
+ int count = 0;
+
+ @Override
+ protected void map(Key k, Value v, Context context) throws IOException, InterruptedException {
+ try {
+ if (key != null)
+ assertEquals(key.getRow().toString(), new String(v.get()));
+ assertEquals(k.getRow(), new Text(String.format("%09x", count + 1)));
+ assertEquals(new String(v.get()), String.format("%09x", count));
+ } catch (AssertionError e) {
+ e1 = e;
+ }
+ key = new Key(k);
+ count++;
+ }
+
+ @Override
+ protected void cleanup(Context context) throws IOException, InterruptedException {
+ Mutation m = new Mutation("total");
+ m.put("", "", Integer.toString(count));
+ context.write(new Text(), m);
+ }
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+
+ if (args.length != 4) {
+ throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <user> <pass> <inputtable> <outputtable>");
+ }
+
+ String user = args[0];
+ String pass = args[1];
+ String table1 = args[2];
+ String table2 = args[3];
+
+ @SuppressWarnings("deprecation")
+ Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
+ job.setJarByClass(this.getClass());
+
+ job.setInputFormatClass(AccumuloInputFormat.class);
+
+ AccumuloInputFormat.setConnectorInfo(job, user, new PasswordToken(pass));
+ AccumuloInputFormat.setInputTableName(job, table1);
+ AccumuloInputFormat.setMockInstance(job, INSTANCE_NAME);
+
+ job.setMapperClass(TestMapper.class);
+ job.setMapOutputKeyClass(Key.class);
+ job.setMapOutputValueClass(Value.class);
+ job.setOutputFormatClass(AccumuloOutputFormat.class);
+ job.setOutputKeyClass(Text.class);
+ job.setOutputValueClass(Mutation.class);
+
+ AccumuloOutputFormat.setConnectorInfo(job, user, new PasswordToken(pass));
+ AccumuloOutputFormat.setCreateTables(job, false);
+ AccumuloOutputFormat.setDefaultTableName(job, table2);
+ AccumuloOutputFormat.setMockInstance(job, INSTANCE_NAME);
+
+ job.setNumReduceTasks(0);
+
+ job.waitForCompletion(true);
+
+ return job.isSuccessful() ? 0 : 1;
+ }
+
+ public static void main(String[] args) throws Exception {
+ assertEquals(0, ToolRunner.run(CachedConfiguration.getInstance(), new MRTester(), args));
+ }
+ }
+
+ @Test
+ public void testBWSettings() throws IOException {
+ @SuppressWarnings("deprecation")
+ Job job = new Job();
+
+ // make sure we aren't testing defaults
+ final BatchWriterConfig bwDefaults = new BatchWriterConfig();
+ assertNotEquals(7654321l, bwDefaults.getMaxLatency(TimeUnit.MILLISECONDS));
+ assertNotEquals(9898989l, bwDefaults.getTimeout(TimeUnit.MILLISECONDS));
+ assertNotEquals(42, bwDefaults.getMaxWriteThreads());
+ assertNotEquals(1123581321l, bwDefaults.getMaxMemory());
+
+ final BatchWriterConfig bwConfig = new BatchWriterConfig();
+ bwConfig.setMaxLatency(7654321l, TimeUnit.MILLISECONDS);
+ bwConfig.setTimeout(9898989l, TimeUnit.MILLISECONDS);
+ bwConfig.setMaxWriteThreads(42);
+ bwConfig.setMaxMemory(1123581321l);
+ AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig);
+
+ AccumuloOutputFormat myAOF = new AccumuloOutputFormat() {
+ @Override
+ public void checkOutputSpecs(JobContext job) throws IOException {
+ BatchWriterConfig bwOpts = getBatchWriterOptions(job);
+
+ // passive check
+ assertEquals(bwConfig.getMaxLatency(TimeUnit.MILLISECONDS), bwOpts.getMaxLatency(TimeUnit.MILLISECONDS));
+ assertEquals(bwConfig.getTimeout(TimeUnit.MILLISECONDS), bwOpts.getTimeout(TimeUnit.MILLISECONDS));
+ assertEquals(bwConfig.getMaxWriteThreads(), bwOpts.getMaxWriteThreads());
+ assertEquals(bwConfig.getMaxMemory(), bwOpts.getMaxMemory());
+
+ // explicit check
+ assertEquals(7654321l, bwOpts.getMaxLatency(TimeUnit.MILLISECONDS));
+ assertEquals(9898989l, bwOpts.getTimeout(TimeUnit.MILLISECONDS));
+ assertEquals(42, bwOpts.getMaxWriteThreads());
+ assertEquals(1123581321l, bwOpts.getMaxMemory());
+
+ }
+ };
+ myAOF.checkOutputSpecs(job);
+ }
+
+ @Test
+ public void testMR() throws Exception {
+ MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
+ Connector c = mockInstance.getConnector("root", new PasswordToken(""));
+ c.tableOperations().create(TEST_TABLE_1);
+ c.tableOperations().create(TEST_TABLE_2);
+ BatchWriter bw = c.createBatchWriter(TEST_TABLE_1, new BatchWriterConfig());
+ for (int i = 0; i < 100; i++) {
+ Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
+ m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
+ bw.addMutation(m);
+ }
+ bw.close();
+
+ MRTester.main(new String[] {"root", "", TEST_TABLE_1, TEST_TABLE_2});
+ assertNull(e1);
+
+ Scanner scanner = c.createScanner(TEST_TABLE_2, new Authorizations());
+ Iterator<Entry<Key,Value>> iter = scanner.iterator();
+ assertTrue(iter.hasNext());
+ Entry<Key,Value> entry = iter.next();
+ assertEquals(Integer.parseInt(new String(entry.getValue().get())), 100);
+ assertFalse(iter.hasNext());
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloRowInputFormatTest.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloRowInputFormatTest.java b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloRowInputFormatTest.java
new file mode 100644
index 0000000..2207437
--- /dev/null
+++ b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloRowInputFormatTest.java
@@ -0,0 +1,202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.BatchWriterConfig;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.MutationsRejectedException;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.PasswordToken;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.KeyValue;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.ColumnVisibility;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.accumulo.core.util.PeekingIterator;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.Test;
+
+public class AccumuloRowInputFormatTest {
+ private static final String PREFIX = AccumuloRowInputFormatTest.class.getSimpleName();
+ private static final String INSTANCE_NAME = PREFIX + "_mapreduce_instance";
+ private static final String TEST_TABLE_1 = PREFIX + "_mapreduce_table_1";
+
+ private static final String ROW1 = "row1";
+ private static final String ROW2 = "row2";
+ private static final String ROW3 = "row3";
+ private static final String COLF1 = "colf1";
+ private static List<Entry<Key,Value>> row1;
+ private static List<Entry<Key,Value>> row2;
+ private static List<Entry<Key,Value>> row3;
+ private static AssertionError e1 = null;
+ private static AssertionError e2 = null;
+
+ public AccumuloRowInputFormatTest() {
+ row1 = new ArrayList<Entry<Key,Value>>();
+ row1.add(new KeyValue(new Key(ROW1, COLF1, "colq1"), "v1".getBytes()));
+ row1.add(new KeyValue(new Key(ROW1, COLF1, "colq2"), "v2".getBytes()));
+ row1.add(new KeyValue(new Key(ROW1, "colf2", "colq3"), "v3".getBytes()));
+ row2 = new ArrayList<Entry<Key,Value>>();
+ row2.add(new KeyValue(new Key(ROW2, COLF1, "colq4"), "v4".getBytes()));
+ row3 = new ArrayList<Entry<Key,Value>>();
+ row3.add(new KeyValue(new Key(ROW3, COLF1, "colq5"), "v5".getBytes()));
+ }
+
+ public static void checkLists(final List<Entry<Key,Value>> first, final List<Entry<Key,Value>> second) {
+ assertEquals("Sizes should be the same.", first.size(), second.size());
+ for (int i = 0; i < first.size(); i++) {
+ assertEquals("Keys should be equal.", first.get(i).getKey(), second.get(i).getKey());
+ assertEquals("Values should be equal.", first.get(i).getValue(), second.get(i).getValue());
+ }
+ }
+
+ public static void checkLists(final List<Entry<Key,Value>> first, final Iterator<Entry<Key,Value>> second) {
+ int entryIndex = 0;
+ while (second.hasNext()) {
+ final Entry<Key,Value> entry = second.next();
+ assertEquals("Keys should be equal", first.get(entryIndex).getKey(), entry.getKey());
+ assertEquals("Values should be equal", first.get(entryIndex).getValue(), entry.getValue());
+ entryIndex++;
+ }
+ }
+
+ public static void insertList(final BatchWriter writer, final List<Entry<Key,Value>> list) throws MutationsRejectedException {
+ for (Entry<Key,Value> e : list) {
+ final Key key = e.getKey();
+ final Mutation mutation = new Mutation(key.getRow());
+ ColumnVisibility colVisibility = new ColumnVisibility(key.getColumnVisibility());
+ mutation.put(key.getColumnFamily(), key.getColumnQualifier(), colVisibility, key.getTimestamp(), e.getValue());
+ writer.addMutation(mutation);
+ }
+ }
+
+ private static class MRTester extends Configured implements Tool {
+ private static class TestMapper extends Mapper<Text,PeekingIterator<Entry<Key,Value>>,Key,Value> {
+ int count = 0;
+
+ @Override
+ protected void map(Text k, PeekingIterator<Entry<Key,Value>> v, Context context) throws IOException, InterruptedException {
+ try {
+ switch (count) {
+ case 0:
+ assertEquals("Current key should be " + ROW1, new Text(ROW1), k);
+ checkLists(row1, v);
+ break;
+ case 1:
+ assertEquals("Current key should be " + ROW2, new Text(ROW2), k);
+ checkLists(row2, v);
+ break;
+ case 2:
+ assertEquals("Current key should be " + ROW3, new Text(ROW3), k);
+ checkLists(row3, v);
+ break;
+ default:
+ assertTrue(false);
+ }
+ } catch (AssertionError e) {
+ e1 = e;
+ }
+ count++;
+ }
+
+ @Override
+ protected void cleanup(Context context) throws IOException, InterruptedException {
+ try {
+ assertEquals(3, count);
+ } catch (AssertionError e) {
+ e2 = e;
+ }
+ }
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+
+ if (args.length != 3) {
+ throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <user> <pass> <table>");
+ }
+
+ String user = args[0];
+ String pass = args[1];
+ String table = args[2];
+
+ @SuppressWarnings("deprecation")
+ Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
+ job.setJarByClass(this.getClass());
+
+ job.setInputFormatClass(AccumuloRowInputFormat.class);
+
+ AccumuloInputFormat.setConnectorInfo(job, user, new PasswordToken(pass));
+ AccumuloInputFormat.setInputTableName(job, table);
+ AccumuloRowInputFormat.setMockInstance(job, INSTANCE_NAME);
+
+ job.setMapperClass(TestMapper.class);
+ job.setMapOutputKeyClass(Key.class);
+ job.setMapOutputValueClass(Value.class);
+ job.setOutputFormatClass(NullOutputFormat.class);
+
+ job.setNumReduceTasks(0);
+
+ job.waitForCompletion(true);
+
+ return job.isSuccessful() ? 0 : 1;
+ }
+
+ public static void main(String[] args) throws Exception {
+ assertEquals(0, ToolRunner.run(CachedConfiguration.getInstance(), new MRTester(), args));
+ }
+ }
+
+ @Test
+ public void test() throws Exception {
+ final MockInstance instance = new MockInstance(INSTANCE_NAME);
+ final Connector conn = instance.getConnector("root", new PasswordToken(""));
+ conn.tableOperations().create(TEST_TABLE_1);
+ BatchWriter writer = null;
+ try {
+ writer = conn.createBatchWriter(TEST_TABLE_1, new BatchWriterConfig());
+ insertList(writer, row1);
+ insertList(writer, row2);
+ insertList(writer, row3);
+ } finally {
+ if (writer != null) {
+ writer.close();
+ }
+ }
+ MRTester.main(new String[] {"root", "", TEST_TABLE_1});
+ assertNull(e1);
+ assertNull(e2);
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/BadPasswordSplitsAccumuloInputFormat.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/BadPasswordSplitsAccumuloInputFormat.java b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/BadPasswordSplitsAccumuloInputFormat.java
new file mode 100644
index 0000000..fce7781
--- /dev/null
+++ b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapreduce/BadPasswordSplitsAccumuloInputFormat.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.accumulo.core.client.security.tokens.PasswordToken;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+
+/**
+ * AccumuloInputFormat which returns an "empty" RangeInputSplit
+ */
+public class BadPasswordSplitsAccumuloInputFormat extends AccumuloInputFormat {
+
+ @Override
+ public List<InputSplit> getSplits(JobContext context) throws IOException {
+ List<InputSplit> splits = super.getSplits(context);
+
+ for (InputSplit split : splits) {
+ org.apache.accumulo.core.client.mapreduce.RangeInputSplit rangeSplit = (org.apache.accumulo.core.client.mapreduce.RangeInputSplit) split;
+ rangeSplit.setToken(new PasswordToken("anythingelse"));
+ }
+
+ return splits;
+ }
+}
[04/12] ACCUMULO-1880 create mapreduce module
Posted by md...@apache.org.
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/FileOutputConfigurator.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/FileOutputConfigurator.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/FileOutputConfigurator.java
new file mode 100644
index 0000000..ce84209
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/FileOutputConfigurator.java
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce.lib.impl;
+
+import java.util.Arrays;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.conf.AccumuloConfiguration;
+import org.apache.accumulo.core.conf.ConfigurationCopy;
+import org.apache.accumulo.core.conf.Property;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * @since 1.6.0
+ */
+public class FileOutputConfigurator extends ConfiguratorBase {
+
+ /**
+ * Configuration keys for {@link AccumuloConfiguration}.
+ *
+ * @since 1.6.0
+ */
+ public static enum Opts {
+ ACCUMULO_PROPERTIES;
+ }
+
+ /**
+ * The supported Accumulo properties we set in this OutputFormat, that change the behavior of the RecordWriter.<br />
+ * These properties correspond to the supported public static setter methods available to this class.
+ *
+ * @param property
+ * the Accumulo property to check
+ * @since 1.6.0
+ */
+ protected static Boolean isSupportedAccumuloProperty(Property property) {
+ switch (property) {
+ case TABLE_FILE_COMPRESSION_TYPE:
+ case TABLE_FILE_COMPRESSED_BLOCK_SIZE:
+ case TABLE_FILE_BLOCK_SIZE:
+ case TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX:
+ case TABLE_FILE_REPLICATION:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ /**
+ * Helper for transforming Accumulo configuration properties into something that can be stored safely inside the Hadoop Job configuration.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param property
+ * the supported Accumulo property
+ * @param value
+ * the value of the property to set
+ * @since 1.6.0
+ */
+ private static <T> void setAccumuloProperty(Class<?> implementingClass, Configuration conf, Property property, T value) {
+ if (isSupportedAccumuloProperty(property)) {
+ String val = String.valueOf(value);
+ if (property.getType().isValidFormat(val))
+ conf.set(enumToConfKey(implementingClass, Opts.ACCUMULO_PROPERTIES) + "." + property.getKey(), val);
+ else
+ throw new IllegalArgumentException("Value is not appropriate for property type '" + property.getType() + "'");
+ } else
+ throw new IllegalArgumentException("Unsupported configuration property " + property.getKey());
+ }
+
+ /**
+ * This helper method provides an AccumuloConfiguration object constructed from the Accumulo defaults, and overridden with Accumulo properties that have been
+ * stored in the Job's configuration.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @since 1.6.0
+ */
+ public static AccumuloConfiguration getAccumuloConfiguration(Class<?> implementingClass, Configuration conf) {
+ String prefix = enumToConfKey(implementingClass, Opts.ACCUMULO_PROPERTIES) + ".";
+ ConfigurationCopy acuConf = new ConfigurationCopy(AccumuloConfiguration.getDefaultConfiguration());
+ for (Entry<String,String> entry : conf)
+ if (entry.getKey().startsWith(prefix))
+ acuConf.set(Property.getPropertyByKey(entry.getKey().substring(prefix.length())), entry.getValue());
+ return acuConf;
+ }
+
+ /**
+ * Sets the compression type to use for data blocks. Specifying a compression may require additional libraries to be available to your Job.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param compressionType
+ * one of "none", "gz", "lzo", or "snappy"
+ * @since 1.6.0
+ */
+ public static void setCompressionType(Class<?> implementingClass, Configuration conf, String compressionType) {
+ if (compressionType == null || !Arrays.asList("none", "gz", "lzo", "snappy").contains(compressionType))
+ throw new IllegalArgumentException("Compression type must be one of: none, gz, lzo, snappy");
+ setAccumuloProperty(implementingClass, conf, Property.TABLE_FILE_COMPRESSION_TYPE, compressionType);
+ }
+
+ /**
+ * Sets the size for data blocks within each file.<br />
+ * Data blocks are a span of key/value pairs stored in the file that are compressed and indexed as a group.
+ *
+ * <p>
+ * Making this value smaller may increase seek performance, but at the cost of increasing the size of the indexes (which can also affect seek performance).
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param dataBlockSize
+ * the block size, in bytes
+ * @since 1.6.0
+ */
+ public static void setDataBlockSize(Class<?> implementingClass, Configuration conf, long dataBlockSize) {
+ setAccumuloProperty(implementingClass, conf, Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE, dataBlockSize);
+ }
+
+ /**
+ * Sets the size for file blocks in the file system; file blocks are managed, and replicated, by the underlying file system.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param fileBlockSize
+ * the block size, in bytes
+ * @since 1.6.0
+ */
+ public static void setFileBlockSize(Class<?> implementingClass, Configuration conf, long fileBlockSize) {
+ setAccumuloProperty(implementingClass, conf, Property.TABLE_FILE_BLOCK_SIZE, fileBlockSize);
+ }
+
+ /**
+ * Sets the size for index blocks within each file; smaller blocks means a deeper index hierarchy within the file, while larger blocks mean a more shallow
+ * index hierarchy within the file. This can affect the performance of queries.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param indexBlockSize
+ * the block size, in bytes
+ * @since 1.6.0
+ */
+ public static void setIndexBlockSize(Class<?> implementingClass, Configuration conf, long indexBlockSize) {
+ setAccumuloProperty(implementingClass, conf, Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX, indexBlockSize);
+ }
+
+ /**
+ * Sets the file system replication factor for the resulting file, overriding the file system default.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param replication
+ * the number of replicas for produced files
+ * @since 1.6.0
+ */
+ public static void setReplication(Class<?> implementingClass, Configuration conf, int replication) {
+ setAccumuloProperty(implementingClass, conf, Property.TABLE_FILE_REPLICATION, replication);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/InputConfigurator.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/InputConfigurator.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/InputConfigurator.java
new file mode 100644
index 0000000..7657c3c
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/InputConfigurator.java
@@ -0,0 +1,796 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce.lib.impl;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.StringTokenizer;
+
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.ClientSideIteratorScanner;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.Instance;
+import org.apache.accumulo.core.client.IsolatedScanner;
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.RowIterator;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.impl.Tables;
+import org.apache.accumulo.core.client.impl.TabletLocator;
+import org.apache.accumulo.core.client.mapreduce.InputTableConfig;
+import org.apache.accumulo.core.client.mock.MockTabletLocator;
+import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.KeyExtent;
+import org.apache.accumulo.core.data.PartialKey;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
+import org.apache.accumulo.core.master.state.tables.TableState;
+import org.apache.accumulo.core.metadata.MetadataTable;
+import org.apache.accumulo.core.metadata.schema.MetadataSchema;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.security.TablePermission;
+import org.apache.accumulo.core.util.Pair;
+import org.apache.accumulo.core.util.TextUtil;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.MapWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.util.StringUtils;
+
+import com.google.common.collect.Maps;
+
+/**
+ * @since 1.6.0
+ */
+public class InputConfigurator extends ConfiguratorBase {
+
+ /**
+ * Configuration keys for {@link Scanner}.
+ *
+ * @since 1.6.0
+ */
+ public static enum ScanOpts {
+ TABLE_NAME, AUTHORIZATIONS, RANGES, COLUMNS, ITERATORS, TABLE_CONFIGS
+ }
+
+ /**
+ * Configuration keys for various features.
+ *
+ * @since 1.6.0
+ */
+ public static enum Features {
+ AUTO_ADJUST_RANGES, SCAN_ISOLATION, USE_LOCAL_ITERATORS, SCAN_OFFLINE
+ }
+
+ /**
+ * Sets the name of the input table, over which this job will scan.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param tableName
+ * the table to use when the tablename is null in the write call
+ * @since 1.6.0
+ */
+ public static void setInputTableName(Class<?> implementingClass, Configuration conf, String tableName) {
+ checkArgument(tableName != null, "tableName is null");
+ conf.set(enumToConfKey(implementingClass, ScanOpts.TABLE_NAME), tableName);
+ }
+
+ /**
+ * Sets the name of the input table, over which this job will scan.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @since 1.6.0
+ */
+ public static String getInputTableName(Class<?> implementingClass, Configuration conf) {
+ return conf.get(enumToConfKey(implementingClass, ScanOpts.TABLE_NAME));
+ }
+
+ /**
+ * Sets the {@link Authorizations} used to scan. Must be a subset of the user's authorization. Defaults to the empty set.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param auths
+ * the user's authorizations
+ * @since 1.6.0
+ */
+ public static void setScanAuthorizations(Class<?> implementingClass, Configuration conf, Authorizations auths) {
+ if (auths != null && !auths.isEmpty())
+ conf.set(enumToConfKey(implementingClass, ScanOpts.AUTHORIZATIONS), auths.serialize());
+ }
+
+ /**
+ * Gets the authorizations to set for the scans from the configuration.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @return the Accumulo scan authorizations
+ * @since 1.6.0
+ * @see #setScanAuthorizations(Class, Configuration, Authorizations)
+ */
+ public static Authorizations getScanAuthorizations(Class<?> implementingClass, Configuration conf) {
+ String authString = conf.get(enumToConfKey(implementingClass, ScanOpts.AUTHORIZATIONS));
+ return authString == null ? Authorizations.EMPTY : new Authorizations(authString.getBytes(StandardCharsets.UTF_8));
+ }
+
+ /**
+ * Sets the input ranges to scan on all input tables for this job. If not set, the entire table will be scanned.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param ranges
+ * the ranges that will be mapped over
+ * @throws IllegalArgumentException
+ * if the ranges cannot be encoded into base 64
+ * @since 1.6.0
+ */
+ public static void setRanges(Class<?> implementingClass, Configuration conf, Collection<Range> ranges) {
+ checkArgument(ranges != null, "ranges is null");
+
+ ArrayList<String> rangeStrings = new ArrayList<String>(ranges.size());
+ try {
+ for (Range r : ranges) {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ r.write(new DataOutputStream(baos));
+ rangeStrings.add(new String(Base64.encodeBase64(baos.toByteArray())));
+ }
+ conf.setStrings(enumToConfKey(implementingClass, ScanOpts.RANGES), rangeStrings.toArray(new String[0]));
+ } catch (IOException ex) {
+ throw new IllegalArgumentException("Unable to encode ranges to Base64", ex);
+ }
+ }
+
+ /**
+ * Gets the ranges to scan over from a job.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @return the ranges
+ * @throws IOException
+ * if the ranges have been encoded improperly
+ * @since 1.6.0
+ * @see #setRanges(Class, Configuration, Collection)
+ */
+ public static List<Range> getRanges(Class<?> implementingClass, Configuration conf) throws IOException {
+
+ Collection<String> encodedRanges = conf.getStringCollection(enumToConfKey(implementingClass, ScanOpts.RANGES));
+ List<Range> ranges = new ArrayList<Range>();
+ for (String rangeString : encodedRanges) {
+ ByteArrayInputStream bais = new ByteArrayInputStream(Base64.decodeBase64(rangeString.getBytes()));
+ Range range = new Range();
+ range.readFields(new DataInputStream(bais));
+ ranges.add(range);
+ }
+ return ranges;
+ }
+
+ /**
+ * Gets a list of the iterator settings (for iterators to apply to a scanner) from this configuration.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @return a list of iterators
+ * @since 1.6.0
+ * @see #addIterator(Class, Configuration, IteratorSetting)
+ */
+ public static List<IteratorSetting> getIterators(Class<?> implementingClass, Configuration conf) {
+ String iterators = conf.get(enumToConfKey(implementingClass, ScanOpts.ITERATORS));
+
+ // If no iterators are present, return an empty list
+ if (iterators == null || iterators.isEmpty())
+ return new ArrayList<IteratorSetting>();
+
+ // Compose the set of iterators encoded in the job configuration
+ StringTokenizer tokens = new StringTokenizer(iterators, StringUtils.COMMA_STR);
+ List<IteratorSetting> list = new ArrayList<IteratorSetting>();
+ try {
+ while (tokens.hasMoreTokens()) {
+ String itstring = tokens.nextToken();
+ ByteArrayInputStream bais = new ByteArrayInputStream(Base64.decodeBase64(itstring.getBytes()));
+ list.add(new IteratorSetting(new DataInputStream(bais)));
+ bais.close();
+ }
+ } catch (IOException e) {
+ throw new IllegalArgumentException("couldn't decode iterator settings");
+ }
+ return list;
+ }
+
+ /**
+ * Restricts the columns that will be mapped over for the single input table on this job.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param columnFamilyColumnQualifierPairs
+ * a pair of {@link Text} objects corresponding to column family and column qualifier. If the column qualifier is null, the entire column family is
+ * selected. An empty set is the default and is equivalent to scanning the all columns.
+ * @throws IllegalArgumentException
+ * if the column family is null
+ * @since 1.6.0
+ */
+ public static void fetchColumns(Class<?> implementingClass, Configuration conf, Collection<Pair<Text,Text>> columnFamilyColumnQualifierPairs) {
+ checkArgument(columnFamilyColumnQualifierPairs != null, "columnFamilyColumnQualifierPairs is null");
+ String[] columnStrings = serializeColumns(columnFamilyColumnQualifierPairs);
+ conf.setStrings(enumToConfKey(implementingClass, ScanOpts.COLUMNS), columnStrings);
+ }
+
+ public static String[] serializeColumns(Collection<Pair<Text,Text>> columnFamilyColumnQualifierPairs) {
+ checkArgument(columnFamilyColumnQualifierPairs != null, "columnFamilyColumnQualifierPairs is null");
+ ArrayList<String> columnStrings = new ArrayList<String>(columnFamilyColumnQualifierPairs.size());
+ for (Pair<Text,Text> column : columnFamilyColumnQualifierPairs) {
+
+ if (column.getFirst() == null)
+ throw new IllegalArgumentException("Column family can not be null");
+
+ String col = new String(Base64.encodeBase64(TextUtil.getBytes(column.getFirst())), StandardCharsets.UTF_8);
+ if (column.getSecond() != null)
+ col += ":" + new String(Base64.encodeBase64(TextUtil.getBytes(column.getSecond())), StandardCharsets.UTF_8);
+ columnStrings.add(col);
+ }
+
+ return columnStrings.toArray(new String[0]);
+ }
+
+ /**
+ * Gets the columns to be mapped over from this job.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @return a set of columns
+ * @since 1.6.0
+ * @see #fetchColumns(Class, Configuration, Collection)
+ */
+ public static Set<Pair<Text,Text>> getFetchedColumns(Class<?> implementingClass, Configuration conf) {
+ checkArgument(conf != null, "conf is null");
+ String confValue = conf.get(enumToConfKey(implementingClass, ScanOpts.COLUMNS));
+ List<String> serialized = new ArrayList<String>();
+ if (confValue != null) {
+ // Split and include any trailing empty strings to allow empty column families
+ for (String val : confValue.split(",", -1)) {
+ serialized.add(val);
+ }
+ }
+ return deserializeFetchedColumns(serialized);
+ }
+
+ public static Set<Pair<Text,Text>> deserializeFetchedColumns(Collection<String> serialized) {
+ Set<Pair<Text,Text>> columns = new HashSet<Pair<Text,Text>>();
+
+ if (null == serialized) {
+ return columns;
+ }
+
+ for (String col : serialized) {
+ int idx = col.indexOf(":");
+ Text cf = new Text(idx < 0 ? Base64.decodeBase64(col.getBytes(StandardCharsets.UTF_8)) : Base64.decodeBase64(col.substring(0, idx).getBytes(
+ StandardCharsets.UTF_8)));
+ Text cq = idx < 0 ? null : new Text(Base64.decodeBase64(col.substring(idx + 1).getBytes(StandardCharsets.UTF_8)));
+ columns.add(new Pair<Text,Text>(cf, cq));
+ }
+ return columns;
+ }
+
+ /**
+ * Encode an iterator on the input for the single input table associated with this job.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param cfg
+ * the configuration of the iterator
+ * @throws IllegalArgumentException
+ * if the iterator can't be serialized into the configuration
+ * @since 1.6.0
+ */
+ public static void addIterator(Class<?> implementingClass, Configuration conf, IteratorSetting cfg) {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ String newIter;
+ try {
+ cfg.write(new DataOutputStream(baos));
+ newIter = new String(Base64.encodeBase64(baos.toByteArray()), StandardCharsets.UTF_8);
+ baos.close();
+ } catch (IOException e) {
+ throw new IllegalArgumentException("unable to serialize IteratorSetting");
+ }
+
+ String confKey = enumToConfKey(implementingClass, ScanOpts.ITERATORS);
+ String iterators = conf.get(confKey);
+ // No iterators specified yet, create a new string
+ if (iterators == null || iterators.isEmpty()) {
+ iterators = newIter;
+ } else {
+ // append the next iterator & reset
+ iterators = iterators.concat(StringUtils.COMMA_STR + newIter);
+ }
+ // Store the iterators w/ the job
+ conf.set(confKey, iterators);
+ }
+
+ /**
+ * Controls the automatic adjustment of ranges for this job. This feature merges overlapping ranges, then splits them to align with tablet boundaries.
+ * Disabling this feature will cause exactly one Map task to be created for each specified range. The default setting is enabled. *
+ *
+ * <p>
+ * By default, this feature is <b>enabled</b>.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param enableFeature
+ * the feature is enabled if true, disabled otherwise
+ * @see #setRanges(Class, Configuration, Collection)
+ * @since 1.6.0
+ */
+ public static void setAutoAdjustRanges(Class<?> implementingClass, Configuration conf, boolean enableFeature) {
+ conf.setBoolean(enumToConfKey(implementingClass, Features.AUTO_ADJUST_RANGES), enableFeature);
+ }
+
+ /**
+ * Determines whether a configuration has auto-adjust ranges enabled.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @return false if the feature is disabled, true otherwise
+ * @since 1.6.0
+ * @see #setAutoAdjustRanges(Class, Configuration, boolean)
+ */
+ public static Boolean getAutoAdjustRanges(Class<?> implementingClass, Configuration conf) {
+ return conf.getBoolean(enumToConfKey(implementingClass, Features.AUTO_ADJUST_RANGES), true);
+ }
+
+ /**
+ * Controls the use of the {@link IsolatedScanner} in this job.
+ *
+ * <p>
+ * By default, this feature is <b>disabled</b>.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param enableFeature
+ * the feature is enabled if true, disabled otherwise
+ * @since 1.6.0
+ */
+ public static void setScanIsolation(Class<?> implementingClass, Configuration conf, boolean enableFeature) {
+ conf.setBoolean(enumToConfKey(implementingClass, Features.SCAN_ISOLATION), enableFeature);
+ }
+
+ /**
+ * Determines whether a configuration has isolation enabled.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @return true if the feature is enabled, false otherwise
+ * @since 1.6.0
+ * @see #setScanIsolation(Class, Configuration, boolean)
+ */
+ public static Boolean isIsolated(Class<?> implementingClass, Configuration conf) {
+ return conf.getBoolean(enumToConfKey(implementingClass, Features.SCAN_ISOLATION), false);
+ }
+
+ /**
+ * Controls the use of the {@link ClientSideIteratorScanner} in this job. Enabling this feature will cause the iterator stack to be constructed within the Map
+ * task, rather than within the Accumulo TServer. To use this feature, all classes needed for those iterators must be available on the classpath for the task.
+ *
+ * <p>
+ * By default, this feature is <b>disabled</b>.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param enableFeature
+ * the feature is enabled if true, disabled otherwise
+ * @since 1.6.0
+ */
+ public static void setLocalIterators(Class<?> implementingClass, Configuration conf, boolean enableFeature) {
+ conf.setBoolean(enumToConfKey(implementingClass, Features.USE_LOCAL_ITERATORS), enableFeature);
+ }
+
+ /**
+ * Determines whether a configuration uses local iterators.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @return true if the feature is enabled, false otherwise
+ * @since 1.6.0
+ * @see #setLocalIterators(Class, Configuration, boolean)
+ */
+ public static Boolean usesLocalIterators(Class<?> implementingClass, Configuration conf) {
+ return conf.getBoolean(enumToConfKey(implementingClass, Features.USE_LOCAL_ITERATORS), false);
+ }
+
+ /**
+ * <p>
+ * Enable reading offline tables. By default, this feature is disabled and only online tables are scanned. This will make the map reduce job directly read the
+ * table's files. If the table is not offline, then the job will fail. If the table comes online during the map reduce job, it is likely that the job will
+ * fail.
+ *
+ * <p>
+ * To use this option, the map reduce user will need access to read the Accumulo directory in HDFS.
+ *
+ * <p>
+ * Reading the offline table will create the scan time iterator stack in the map process. So any iterators that are configured for the table will need to be
+ * on the mapper's classpath.
+ *
+ * <p>
+ * One way to use this feature is to clone a table, take the clone offline, and use the clone as the input table for a map reduce job. If you plan to map
+ * reduce over the data many times, it may be better to the compact the table, clone it, take it offline, and use the clone for all map reduce jobs. The
+ * reason to do this is that compaction will reduce each tablet in the table to one file, and it is faster to read from one file.
+ *
+ * <p>
+ * There are two possible advantages to reading a tables file directly out of HDFS. First, you may see better read performance. Second, it will support
+ * speculative execution better. When reading an online table speculative execution can put more load on an already slow tablet server.
+ *
+ * <p>
+ * By default, this feature is <b>disabled</b>.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param enableFeature
+ * the feature is enabled if true, disabled otherwise
+ * @since 1.6.0
+ */
+ public static void setOfflineTableScan(Class<?> implementingClass, Configuration conf, boolean enableFeature) {
+ conf.setBoolean(enumToConfKey(implementingClass, Features.SCAN_OFFLINE), enableFeature);
+ }
+
+ /**
+ * Determines whether a configuration has the offline table scan feature enabled.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @return true if the feature is enabled, false otherwise
+ * @since 1.6.0
+ * @see #setOfflineTableScan(Class, Configuration, boolean)
+ */
+ public static Boolean isOfflineScan(Class<?> implementingClass, Configuration conf) {
+ return conf.getBoolean(enumToConfKey(implementingClass, Features.SCAN_OFFLINE), false);
+ }
+
+ /**
+ * Sets configurations for multiple tables at a time.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param configs
+ * an array of {@link InputTableConfig} objects to associate with the job
+ * @since 1.6.0
+ */
+ public static void setInputTableConfigs(Class<?> implementingClass, Configuration conf, Map<String,InputTableConfig> configs) {
+ MapWritable mapWritable = new MapWritable();
+ for (Map.Entry<String,InputTableConfig> tableConfig : configs.entrySet())
+ mapWritable.put(new Text(tableConfig.getKey()), tableConfig.getValue());
+
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ try {
+ mapWritable.write(new DataOutputStream(baos));
+ } catch (IOException e) {
+ throw new IllegalStateException("Table configuration could not be serialized.");
+ }
+
+ String confKey = enumToConfKey(implementingClass, ScanOpts.TABLE_CONFIGS);
+ conf.set(confKey, new String(Base64.encodeBase64(baos.toByteArray())));
+ }
+
+ /**
+ * Returns all {@link InputTableConfig} objects associated with this job.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @return all of the table query configs for the job
+ * @since 1.6.0
+ */
+ public static Map<String,InputTableConfig> getInputTableConfigs(Class<?> implementingClass, Configuration conf) {
+ Map<String,InputTableConfig> configs = new HashMap<String,InputTableConfig>();
+ Map.Entry<String,InputTableConfig> defaultConfig = getDefaultInputTableConfig(implementingClass, conf);
+ if (defaultConfig != null)
+ configs.put(defaultConfig.getKey(), defaultConfig.getValue());
+ String configString = conf.get(enumToConfKey(implementingClass, ScanOpts.TABLE_CONFIGS));
+ MapWritable mapWritable = new MapWritable();
+ if (configString != null) {
+ try {
+ byte[] bytes = Base64.decodeBase64(configString.getBytes());
+ ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
+ mapWritable.readFields(new DataInputStream(bais));
+ bais.close();
+ } catch (IOException e) {
+ throw new IllegalStateException("The table query configurations could not be deserialized from the given configuration");
+ }
+ }
+ for (Map.Entry<Writable,Writable> entry : mapWritable.entrySet())
+ configs.put(((Text) entry.getKey()).toString(), (InputTableConfig) entry.getValue());
+
+ return configs;
+ }
+
+ /**
+ * Returns the {@link InputTableConfig} for the given table
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param tableName
+ * the table name for which to fetch the table query config
+ * @return the table query config for the given table name (if it exists) and null if it does not
+ * @since 1.6.0
+ */
+ public static InputTableConfig getInputTableConfig(Class<?> implementingClass, Configuration conf, String tableName) {
+ Map<String,InputTableConfig> queryConfigs = getInputTableConfigs(implementingClass, conf);
+ return queryConfigs.get(tableName);
+ }
+
+ /**
+ * Initializes an Accumulo {@link TabletLocator} based on the configuration.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param tableId
+ * The table id for which to initialize the {@link TabletLocator}
+ * @return an Accumulo tablet locator
+ * @throws TableNotFoundException
+ * if the table name set on the configuration doesn't exist
+ * @since 1.6.0
+ */
+ public static TabletLocator getTabletLocator(Class<?> implementingClass, Configuration conf, String tableId) throws TableNotFoundException {
+ String instanceType = conf.get(enumToConfKey(implementingClass, InstanceOpts.TYPE));
+ if ("MockInstance".equals(instanceType))
+ return new MockTabletLocator();
+ Instance instance = getInstance(implementingClass, conf);
+ return TabletLocator.getLocator(instance, new Text(tableId));
+ }
+
+ // InputFormat doesn't have the equivalent of OutputFormat's checkOutputSpecs(JobContext job)
+ /**
+ * Check whether a configuration is fully configured to be used with an Accumulo {@link org.apache.hadoop.mapreduce.InputFormat}.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @throws IOException
+ * if the context is improperly configured
+ * @since 1.6.0
+ */
+ public static void validateOptions(Class<?> implementingClass, Configuration conf) throws IOException {
+
+ Map<String,InputTableConfig> inputTableConfigs = getInputTableConfigs(implementingClass, conf);
+ if (!isConnectorInfoSet(implementingClass, conf))
+ throw new IOException("Input info has not been set.");
+ String instanceKey = conf.get(enumToConfKey(implementingClass, InstanceOpts.TYPE));
+ if (!"MockInstance".equals(instanceKey) && !"ZooKeeperInstance".equals(instanceKey))
+ throw new IOException("Instance info has not been set.");
+ // validate that we can connect as configured
+ try {
+ String principal = getPrincipal(implementingClass, conf);
+ AuthenticationToken token = getAuthenticationToken(implementingClass, conf);
+ Connector c = getInstance(implementingClass, conf).getConnector(principal, token);
+ if (!c.securityOperations().authenticateUser(principal, token))
+ throw new IOException("Unable to authenticate user");
+
+ if (getInputTableConfigs(implementingClass, conf).size() == 0)
+ throw new IOException("No table set.");
+
+ for (Map.Entry<String,InputTableConfig> tableConfig : inputTableConfigs.entrySet()) {
+ if (!c.securityOperations().hasTablePermission(getPrincipal(implementingClass, conf), tableConfig.getKey(), TablePermission.READ))
+ throw new IOException("Unable to access table");
+ }
+ for (Map.Entry<String,InputTableConfig> tableConfigEntry : inputTableConfigs.entrySet()) {
+ InputTableConfig tableConfig = tableConfigEntry.getValue();
+ if (!tableConfig.shouldUseLocalIterators()) {
+ if (tableConfig.getIterators() != null) {
+ for (IteratorSetting iter : tableConfig.getIterators()) {
+ if (!c.tableOperations().testClassLoad(tableConfigEntry.getKey(), iter.getIteratorClass(), SortedKeyValueIterator.class.getName()))
+ throw new AccumuloException("Servers are unable to load " + iter.getIteratorClass() + " as a " + SortedKeyValueIterator.class.getName());
+ }
+ }
+ }
+ }
+ } catch (AccumuloException e) {
+ throw new IOException(e);
+ } catch (AccumuloSecurityException e) {
+ throw new IOException(e);
+ } catch (TableNotFoundException e) {
+ throw new IOException(e);
+ }
+ }
+
+ /**
+ * Returns the {@link org.apache.accumulo.core.client.mapreduce.InputTableConfig} for the configuration based on the properties set using the single-table
+ * input methods.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop instance for which to retrieve the configuration
+ * @return the config object built from the single input table properties set on the job
+ * @since 1.6.0
+ */
+ protected static Map.Entry<String,InputTableConfig> getDefaultInputTableConfig(Class<?> implementingClass, Configuration conf) {
+ String tableName = getInputTableName(implementingClass, conf);
+ if (tableName != null) {
+ InputTableConfig queryConfig = new InputTableConfig();
+ List<IteratorSetting> itrs = getIterators(implementingClass, conf);
+ if (itrs != null)
+ queryConfig.setIterators(itrs);
+ Set<Pair<Text,Text>> columns = getFetchedColumns(implementingClass, conf);
+ if (columns != null)
+ queryConfig.fetchColumns(columns);
+ List<Range> ranges = null;
+ try {
+ ranges = getRanges(implementingClass, conf);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ if (ranges != null)
+ queryConfig.setRanges(ranges);
+
+ queryConfig.setAutoAdjustRanges(getAutoAdjustRanges(implementingClass, conf)).setUseIsolatedScanners(isIsolated(implementingClass, conf))
+ .setUseLocalIterators(usesLocalIterators(implementingClass, conf)).setOfflineScan(isOfflineScan(implementingClass, conf));
+ return Maps.immutableEntry(tableName, queryConfig);
+ }
+ return null;
+ }
+
+ public static Map<String,Map<KeyExtent,List<Range>>> binOffline(String tableId, List<Range> ranges, Instance instance, Connector conn)
+ throws AccumuloException, TableNotFoundException {
+ Map<String,Map<KeyExtent,List<Range>>> binnedRanges = new HashMap<String,Map<KeyExtent,List<Range>>>();
+
+ if (Tables.getTableState(instance, tableId) != TableState.OFFLINE) {
+ Tables.clearCache(instance);
+ if (Tables.getTableState(instance, tableId) != TableState.OFFLINE) {
+ throw new AccumuloException("Table is online tableId:" + tableId + " cannot scan table in offline mode ");
+ }
+ }
+
+ for (Range range : ranges) {
+ Text startRow;
+
+ if (range.getStartKey() != null)
+ startRow = range.getStartKey().getRow();
+ else
+ startRow = new Text();
+
+ Range metadataRange = new Range(new KeyExtent(new Text(tableId), startRow, null).getMetadataEntry(), true, null, false);
+ Scanner scanner = conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY);
+ MetadataSchema.TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN.fetch(scanner);
+ scanner.fetchColumnFamily(MetadataSchema.TabletsSection.LastLocationColumnFamily.NAME);
+ scanner.fetchColumnFamily(MetadataSchema.TabletsSection.CurrentLocationColumnFamily.NAME);
+ scanner.fetchColumnFamily(MetadataSchema.TabletsSection.FutureLocationColumnFamily.NAME);
+ scanner.setRange(metadataRange);
+
+ RowIterator rowIter = new RowIterator(scanner);
+ KeyExtent lastExtent = null;
+ while (rowIter.hasNext()) {
+ Iterator<Map.Entry<Key,Value>> row = rowIter.next();
+ String last = "";
+ KeyExtent extent = null;
+ String location = null;
+
+ while (row.hasNext()) {
+ Map.Entry<Key,Value> entry = row.next();
+ Key key = entry.getKey();
+
+ if (key.getColumnFamily().equals(MetadataSchema.TabletsSection.LastLocationColumnFamily.NAME)) {
+ last = entry.getValue().toString();
+ }
+
+ if (key.getColumnFamily().equals(MetadataSchema.TabletsSection.CurrentLocationColumnFamily.NAME)
+ || key.getColumnFamily().equals(MetadataSchema.TabletsSection.FutureLocationColumnFamily.NAME)) {
+ location = entry.getValue().toString();
+ }
+
+ if (MetadataSchema.TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN.hasColumns(key)) {
+ extent = new KeyExtent(key.getRow(), entry.getValue());
+ }
+
+ }
+
+ if (location != null)
+ return null;
+
+ if (!extent.getTableId().toString().equals(tableId)) {
+ throw new AccumuloException("Saw unexpected table Id " + tableId + " " + extent);
+ }
+
+ if (lastExtent != null && !extent.isPreviousExtent(lastExtent)) {
+ throw new AccumuloException(" " + lastExtent + " is not previous extent " + extent);
+ }
+
+ Map<KeyExtent,List<Range>> tabletRanges = binnedRanges.get(last);
+ if (tabletRanges == null) {
+ tabletRanges = new HashMap<KeyExtent,List<Range>>();
+ binnedRanges.put(last, tabletRanges);
+ }
+
+ List<Range> rangeList = tabletRanges.get(extent);
+ if (rangeList == null) {
+ rangeList = new ArrayList<Range>();
+ tabletRanges.put(extent, rangeList);
+ }
+
+ rangeList.add(range);
+
+ if (extent.getEndRow() == null || range.afterEndKey(new Key(extent.getEndRow()).followingKey(PartialKey.ROW))) {
+ break;
+ }
+
+ lastExtent = extent;
+ }
+
+ }
+ return binnedRanges;
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/OutputConfigurator.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/OutputConfigurator.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/OutputConfigurator.java
new file mode 100644
index 0000000..727971a
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/OutputConfigurator.java
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce.lib.impl;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.BatchWriterConfig;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * @since 1.6.0
+ */
+public class OutputConfigurator extends ConfiguratorBase {
+
+ /**
+ * Configuration keys for {@link BatchWriter}.
+ *
+ * @since 1.6.0
+ */
+ public static enum WriteOpts {
+ DEFAULT_TABLE_NAME, BATCH_WRITER_CONFIG
+ }
+
+ /**
+ * Configuration keys for various features.
+ *
+ * @since 1.6.0
+ */
+ public static enum Features {
+ CAN_CREATE_TABLES, SIMULATION_MODE
+ }
+
+ /**
+ * Sets the default table name to use if one emits a null in place of a table name for a given mutation. Table names can only be alpha-numeric and
+ * underscores.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param tableName
+ * the table to use when the tablename is null in the write call
+ * @since 1.6.0
+ */
+ public static void setDefaultTableName(Class<?> implementingClass, Configuration conf, String tableName) {
+ if (tableName != null)
+ conf.set(enumToConfKey(implementingClass, WriteOpts.DEFAULT_TABLE_NAME), tableName);
+ }
+
+ /**
+ * Gets the default table name from the configuration.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @return the default table name
+ * @since 1.6.0
+ * @see #setDefaultTableName(Class, Configuration, String)
+ */
+ public static String getDefaultTableName(Class<?> implementingClass, Configuration conf) {
+ return conf.get(enumToConfKey(implementingClass, WriteOpts.DEFAULT_TABLE_NAME));
+ }
+
+ /**
+ * Sets the configuration for for the job's {@link BatchWriter} instances. If not set, a new {@link BatchWriterConfig}, with sensible built-in defaults is
+ * used. Setting the configuration multiple times overwrites any previous configuration.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param bwConfig
+ * the configuration for the {@link BatchWriter}
+ * @since 1.6.0
+ */
+ public static void setBatchWriterOptions(Class<?> implementingClass, Configuration conf, BatchWriterConfig bwConfig) {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ String serialized;
+ try {
+ bwConfig.write(new DataOutputStream(baos));
+ serialized = new String(baos.toByteArray(), StandardCharsets.UTF_8);
+ baos.close();
+ } catch (IOException e) {
+ throw new IllegalArgumentException("unable to serialize " + BatchWriterConfig.class.getName());
+ }
+ conf.set(enumToConfKey(implementingClass, WriteOpts.BATCH_WRITER_CONFIG), serialized);
+ }
+
+ /**
+ * Gets the {@link BatchWriterConfig} settings.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @return the configuration object
+ * @since 1.6.0
+ * @see #setBatchWriterOptions(Class, Configuration, BatchWriterConfig)
+ */
+ public static BatchWriterConfig getBatchWriterOptions(Class<?> implementingClass, Configuration conf) {
+ String serialized = conf.get(enumToConfKey(implementingClass, WriteOpts.BATCH_WRITER_CONFIG));
+ BatchWriterConfig bwConfig = new BatchWriterConfig();
+ if (serialized == null || serialized.isEmpty()) {
+ return bwConfig;
+ } else {
+ try {
+ ByteArrayInputStream bais = new ByteArrayInputStream(serialized.getBytes(StandardCharsets.UTF_8));
+ bwConfig.readFields(new DataInputStream(bais));
+ bais.close();
+ return bwConfig;
+ } catch (IOException e) {
+ throw new IllegalArgumentException("unable to serialize " + BatchWriterConfig.class.getName());
+ }
+ }
+ }
+
+ /**
+ * Sets the directive to create new tables, as necessary. Table names can only be alpha-numeric and underscores.
+ *
+ * <p>
+ * By default, this feature is <b>disabled</b>.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param enableFeature
+ * the feature is enabled if true, disabled otherwise
+ * @since 1.6.0
+ */
+ public static void setCreateTables(Class<?> implementingClass, Configuration conf, boolean enableFeature) {
+ conf.setBoolean(enumToConfKey(implementingClass, Features.CAN_CREATE_TABLES), enableFeature);
+ }
+
+ /**
+ * Determines whether tables are permitted to be created as needed.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @return true if the feature is disabled, false otherwise
+ * @since 1.6.0
+ * @see #setCreateTables(Class, Configuration, boolean)
+ */
+ public static Boolean canCreateTables(Class<?> implementingClass, Configuration conf) {
+ return conf.getBoolean(enumToConfKey(implementingClass, Features.CAN_CREATE_TABLES), false);
+ }
+
+ /**
+ * Sets the directive to use simulation mode for this job. In simulation mode, no output is produced. This is useful for testing.
+ *
+ * <p>
+ * By default, this feature is <b>disabled</b>.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param enableFeature
+ * the feature is enabled if true, disabled otherwise
+ * @since 1.6.0
+ */
+ public static void setSimulationMode(Class<?> implementingClass, Configuration conf, boolean enableFeature) {
+ conf.setBoolean(enumToConfKey(implementingClass, Features.SIMULATION_MODE), enableFeature);
+ }
+
+ /**
+ * Determines whether this feature is enabled.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @return true if the feature is enabled, false otherwise
+ * @since 1.6.0
+ * @see #setSimulationMode(Class, Configuration, boolean)
+ */
+ public static Boolean getSimulationMode(Class<?> implementingClass, Configuration conf) {
+ return conf.getBoolean(enumToConfKey(implementingClass, Features.SIMULATION_MODE), false);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/package-info.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/package-info.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/package-info.java
new file mode 100644
index 0000000..243160d
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/package-info.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * This package exists to store common helpers for configuring MapReduce jobs in a single location. It contains static configurator methods, stored in classes
+ * separate from the things they configure (typically, {@link org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat}/
+ * {@link org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat} and related classes in compatible frameworks), rather than storing them in those
+ * InputFormats/OutputFormats, so as not to clutter their API with methods that don't match the conventions for that framework. These classes may be useful to
+ * input/output plugins for other frameworks, so they can reuse the same configuration options and/or serialize them into a
+ * {@link org.apache.hadoop.conf.Configuration} instance in a standard way.
+ *
+ * <p>
+ * It is not expected these will change much (except when new features are added), but end users should not use these classes. They should use the static
+ * configurators on the {@link org.apache.hadoop.mapreduce.InputFormat} or {@link org.apache.hadoop.mapreduce.OutputFormat} they are configuring, which in turn
+ * may use these classes to implement their own static configurators. Once again, these classes are intended for internal use, but may be useful to developers
+ * of plugins for other frameworks that read/write to Accumulo.
+ *
+ * @since 1.6.0
+ */
+package org.apache.accumulo.core.client.mapreduce.lib.impl;
+
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/partition/KeyRangePartitioner.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/partition/KeyRangePartitioner.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/partition/KeyRangePartitioner.java
new file mode 100644
index 0000000..c59841d
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/partition/KeyRangePartitioner.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce.lib.partition;
+
+import org.apache.accumulo.core.data.Key;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Partitioner;
+
+/**
+ * Hadoop partitioner that uses ranges based on row keys, and optionally sub-bins based on hashing.
+ */
+public class KeyRangePartitioner extends Partitioner<Key,Writable> implements Configurable {
+ private RangePartitioner rp = new RangePartitioner();
+
+ @Override
+ public int getPartition(Key key, Writable value, int numPartitions) {
+ return rp.getPartition(key.getRow(), value, numPartitions);
+ }
+
+ @Override
+ public Configuration getConf() {
+ return rp.getConf();
+ }
+
+ @Override
+ public void setConf(Configuration conf) {
+ rp.setConf(conf);
+ }
+
+ /**
+ * Sets the hdfs file name to use, containing a newline separated list of Base64 encoded split points that represent ranges for partitioning
+ */
+ public static void setSplitFile(Job job, String file) {
+ RangePartitioner.setSplitFile(job, file);
+ }
+
+ /**
+ * Sets the number of random sub-bins per range
+ */
+ public static void setNumSubBins(Job job, int num) {
+ RangePartitioner.setNumSubBins(job, num);
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/partition/RangePartitioner.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/partition/RangePartitioner.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/partition/RangePartitioner.java
new file mode 100644
index 0000000..1b7501c
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/partition/RangePartitioner.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce.lib.partition;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.net.URI;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Scanner;
+import java.util.TreeSet;
+
+import org.apache.accumulo.core.client.mapreduce.lib.impl.DistributedCacheHelper;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Partitioner;
+
+/**
+ * Hadoop partitioner that uses ranges, and optionally sub-bins based on hashing.
+ */
+public class RangePartitioner extends Partitioner<Text,Writable> implements Configurable {
+ private static final String PREFIX = RangePartitioner.class.getName();
+ private static final String CUTFILE_KEY = PREFIX + ".cutFile";
+ private static final String NUM_SUBBINS = PREFIX + ".subBins";
+
+ private Configuration conf;
+
+ @Override
+ public int getPartition(Text key, Writable value, int numPartitions) {
+ try {
+ return findPartition(key, getCutPoints(), getNumSubBins());
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ int findPartition(Text key, Text[] array, int numSubBins) {
+ // find the bin for the range, and guarantee it is positive
+ int index = Arrays.binarySearch(array, key);
+ index = index < 0 ? (index + 1) * -1 : index;
+
+ // both conditions work with numSubBins == 1, but this check is to avoid
+ // hashing, when we don't need to, for speed
+ if (numSubBins < 2)
+ return index;
+ return (key.toString().hashCode() & Integer.MAX_VALUE) % numSubBins + index * numSubBins;
+ }
+
+ private int _numSubBins = 0;
+
+ private synchronized int getNumSubBins() {
+ if (_numSubBins < 1) {
+ // get number of sub-bins and guarantee it is positive
+ _numSubBins = Math.max(1, getConf().getInt(NUM_SUBBINS, 1));
+ }
+ return _numSubBins;
+ }
+
+ private Text cutPointArray[] = null;
+
+ private synchronized Text[] getCutPoints() throws IOException {
+ if (cutPointArray == null) {
+ String cutFileName = conf.get(CUTFILE_KEY);
+ Path[] cf = DistributedCacheHelper.getLocalCacheFiles(conf);
+
+ if (cf != null) {
+ for (Path path : cf) {
+ if (path.toUri().getPath().endsWith(cutFileName.substring(cutFileName.lastIndexOf('/')))) {
+ TreeSet<Text> cutPoints = new TreeSet<Text>();
+ Scanner in = new Scanner(new BufferedReader(new InputStreamReader(new FileInputStream(path.toString()), StandardCharsets.UTF_8)));
+ try {
+ while (in.hasNextLine())
+ cutPoints.add(new Text(Base64.decodeBase64(in.nextLine().getBytes(StandardCharsets.UTF_8))));
+ } finally {
+ in.close();
+ }
+ cutPointArray = cutPoints.toArray(new Text[cutPoints.size()]);
+ break;
+ }
+ }
+ }
+ if (cutPointArray == null)
+ throw new FileNotFoundException(cutFileName + " not found in distributed cache");
+ }
+ return cutPointArray;
+ }
+
+ @Override
+ public Configuration getConf() {
+ return conf;
+ }
+
+ @Override
+ public void setConf(Configuration conf) {
+ this.conf = conf;
+ }
+
+ /**
+ * Sets the hdfs file name to use, containing a newline separated list of Base64 encoded split points that represent ranges for partitioning
+ */
+ public static void setSplitFile(Job job, String file) {
+ URI uri = new Path(file).toUri();
+ DistributedCacheHelper.addCacheFile(uri, job.getConfiguration());
+ job.getConfiguration().set(CUTFILE_KEY, uri.getPath());
+ }
+
+ /**
+ * Sets the number of random sub-bins per range
+ */
+ public static void setNumSubBins(Job job, int num) {
+ job.getConfiguration().setInt(NUM_SUBBINS, num);
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/resources/.gitignore
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/resources/.gitignore b/mapreduce/src/main/resources/.gitignore
new file mode 100644
index 0000000..e69de29
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormatTest.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormatTest.java b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormatTest.java
new file mode 100644
index 0000000..aad544b
--- /dev/null
+++ b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormatTest.java
@@ -0,0 +1,247 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapred;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.File;
+import java.io.FileFilter;
+import java.io.IOException;
+
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.BatchWriterConfig;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.PasswordToken;
+import org.apache.accumulo.core.conf.AccumuloConfiguration;
+import org.apache.accumulo.core.conf.Property;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.lib.IdentityMapper;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.log4j.Logger;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+public class AccumuloFileOutputFormatTest {
+ private static final String PREFIX = AccumuloFileOutputFormatTest.class.getSimpleName();
+ private static final String INSTANCE_NAME = PREFIX + "_mapred_instance";
+ private static final String BAD_TABLE = PREFIX + "_mapred_bad_table";
+ private static final String TEST_TABLE = PREFIX + "_mapred_test_table";
+ private static final String EMPTY_TABLE = PREFIX + "_mapred_empty_table";
+
+ private static AssertionError e1 = null;
+ private static AssertionError e2 = null;
+
+ @Rule
+ public TemporaryFolder folder = new TemporaryFolder(new File(System.getProperty("user.dir") + "/target"));
+
+ @BeforeClass
+ public static void setup() throws Exception {
+ MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
+ Connector c = mockInstance.getConnector("root", new PasswordToken(""));
+ c.tableOperations().create(EMPTY_TABLE);
+ c.tableOperations().create(TEST_TABLE);
+ c.tableOperations().create(BAD_TABLE);
+ BatchWriter bw = c.createBatchWriter(TEST_TABLE, new BatchWriterConfig());
+ Mutation m = new Mutation("Key");
+ m.put("", "", "");
+ bw.addMutation(m);
+ bw.close();
+ bw = c.createBatchWriter(BAD_TABLE, new BatchWriterConfig());
+ m = new Mutation("r1");
+ m.put("cf1", "cq1", "A&B");
+ m.put("cf1", "cq1", "A&B");
+ m.put("cf1", "cq2", "A&");
+ bw.addMutation(m);
+ bw.close();
+ }
+
+ @Test
+ public void testEmptyWrite() throws Exception {
+ handleWriteTests(false);
+ }
+
+ @Test
+ public void testRealWrite() throws Exception {
+ handleWriteTests(true);
+ }
+
+ private static class MRTester extends Configured implements Tool {
+ private static class BadKeyMapper implements Mapper<Key,Value,Key,Value> {
+
+ int index = 0;
+
+ @Override
+ public void map(Key key, Value value, OutputCollector<Key,Value> output, Reporter reporter) throws IOException {
+ try {
+ try {
+ output.collect(key, value);
+ if (index == 2)
+ fail();
+ } catch (Exception e) {
+ Logger.getLogger(this.getClass()).error(e, e);
+ assertEquals(2, index);
+ }
+ } catch (AssertionError e) {
+ e1 = e;
+ }
+ index++;
+ }
+
+ @Override
+ public void configure(JobConf job) {}
+
+ @Override
+ public void close() throws IOException {
+ try {
+ assertEquals(2, index);
+ } catch (AssertionError e) {
+ e2 = e;
+ }
+ }
+
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+
+ if (args.length != 4) {
+ throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <user> <pass> <table> <outputfile>");
+ }
+
+ String user = args[0];
+ String pass = args[1];
+ String table = args[2];
+
+ JobConf job = new JobConf(getConf());
+ job.setJarByClass(this.getClass());
+
+ job.setInputFormat(AccumuloInputFormat.class);
+
+ AccumuloInputFormat.setConnectorInfo(job, user, new PasswordToken(pass));
+ AccumuloInputFormat.setInputTableName(job, table);
+ AccumuloInputFormat.setMockInstance(job, INSTANCE_NAME);
+ AccumuloFileOutputFormat.setOutputPath(job, new Path(args[3]));
+
+ job.setMapperClass(BAD_TABLE.equals(table) ? BadKeyMapper.class : IdentityMapper.class);
+ job.setMapOutputKeyClass(Key.class);
+ job.setMapOutputValueClass(Value.class);
+ job.setOutputFormat(AccumuloFileOutputFormat.class);
+
+ job.setNumReduceTasks(0);
+
+ return JobClient.runJob(job).isSuccessful() ? 0 : 1;
+ }
+
+ public static void main(String[] args) throws Exception {
+ assertEquals(0, ToolRunner.run(CachedConfiguration.getInstance(), new MRTester(), args));
+ }
+ }
+
+ public void handleWriteTests(boolean content) throws Exception {
+ File f = folder.newFile("handleWriteTests");
+ f.delete();
+ MRTester.main(new String[] {"root", "", content ? TEST_TABLE : EMPTY_TABLE, f.getAbsolutePath()});
+
+ assertTrue(f.exists());
+ File[] files = f.listFiles(new FileFilter() {
+ @Override
+ public boolean accept(File file) {
+ return file.getName().startsWith("part-m-");
+ }
+ });
+ if (content) {
+ assertEquals(1, files.length);
+ assertTrue(files[0].exists());
+ } else {
+ assertEquals(0, files.length);
+ }
+ }
+
+ @Test
+ public void writeBadVisibility() throws Exception {
+ File f = folder.newFile("writeBadVisibility");
+ f.delete();
+ MRTester.main(new String[] {"root", "", BAD_TABLE, f.getAbsolutePath()});
+ Logger.getLogger(this.getClass()).error(e1, e1);
+ assertNull(e1);
+ assertNull(e2);
+ }
+
+ @Test
+ public void validateConfiguration() throws IOException, InterruptedException {
+
+ int a = 7;
+ long b = 300l;
+ long c = 50l;
+ long d = 10l;
+ String e = "snappy";
+
+ JobConf job = new JobConf();
+ AccumuloFileOutputFormat.setReplication(job, a);
+ AccumuloFileOutputFormat.setFileBlockSize(job, b);
+ AccumuloFileOutputFormat.setDataBlockSize(job, c);
+ AccumuloFileOutputFormat.setIndexBlockSize(job, d);
+ AccumuloFileOutputFormat.setCompressionType(job, e);
+
+ AccumuloConfiguration acuconf = AccumuloFileOutputFormat.getAccumuloConfiguration(job);
+
+ assertEquals(7, acuconf.getCount(Property.TABLE_FILE_REPLICATION));
+ assertEquals(300l, acuconf.getMemoryInBytes(Property.TABLE_FILE_BLOCK_SIZE));
+ assertEquals(50l, acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE));
+ assertEquals(10l, acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX));
+ assertEquals("snappy", acuconf.get(Property.TABLE_FILE_COMPRESSION_TYPE));
+
+ a = 17;
+ b = 1300l;
+ c = 150l;
+ d = 110l;
+ e = "lzo";
+
+ job = new JobConf();
+ AccumuloFileOutputFormat.setReplication(job, a);
+ AccumuloFileOutputFormat.setFileBlockSize(job, b);
+ AccumuloFileOutputFormat.setDataBlockSize(job, c);
+ AccumuloFileOutputFormat.setIndexBlockSize(job, d);
+ AccumuloFileOutputFormat.setCompressionType(job, e);
+
+ acuconf = AccumuloFileOutputFormat.getAccumuloConfiguration(job);
+
+ assertEquals(17, acuconf.getCount(Property.TABLE_FILE_REPLICATION));
+ assertEquals(1300l, acuconf.getMemoryInBytes(Property.TABLE_FILE_BLOCK_SIZE));
+ assertEquals(150l, acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE));
+ assertEquals(110l, acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX));
+ assertEquals("lzo", acuconf.get(Property.TABLE_FILE_COMPRESSION_TYPE));
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormatTest.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormatTest.java b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormatTest.java
new file mode 100644
index 0000000..13490e0
--- /dev/null
+++ b/mapreduce/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormatTest.java
@@ -0,0 +1,285 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapred;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.BatchWriterConfig;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.PasswordToken;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.user.RegExFilter;
+import org.apache.accumulo.core.iterators.user.WholeRowIterator;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.lib.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class AccumuloInputFormatTest {
+
+ private static final String PREFIX = AccumuloInputFormatTest.class.getSimpleName();
+ private static final String INSTANCE_NAME = PREFIX + "_mapred_instance";
+ private static final String TEST_TABLE_1 = PREFIX + "_mapred_table_1";
+
+ private JobConf job;
+
+ @BeforeClass
+ public static void setupClass() {
+ System.setProperty("hadoop.tmp.dir", System.getProperty("user.dir") + "/target/hadoop-tmp");
+ }
+
+ @Before
+ public void createJob() {
+ job = new JobConf();
+ }
+
+ /**
+ * Check that the iterator configuration is getting stored in the Job conf correctly.
+ */
+ @Test
+ public void testSetIterator() throws IOException {
+ IteratorSetting is = new IteratorSetting(1, "WholeRow", "org.apache.accumulo.core.iterators.WholeRowIterator");
+ AccumuloInputFormat.addIterator(job, is);
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ is.write(new DataOutputStream(baos));
+ String iterators = job.get("AccumuloInputFormat.ScanOpts.Iterators");
+ assertEquals(new String(Base64.encodeBase64(baos.toByteArray())), iterators);
+ }
+
+ @Test
+ public void testAddIterator() throws IOException {
+ AccumuloInputFormat.addIterator(job, new IteratorSetting(1, "WholeRow", WholeRowIterator.class));
+ AccumuloInputFormat.addIterator(job, new IteratorSetting(2, "Versions", "org.apache.accumulo.core.iterators.VersioningIterator"));
+ IteratorSetting iter = new IteratorSetting(3, "Count", "org.apache.accumulo.core.iterators.CountingIterator");
+ iter.addOption("v1", "1");
+ iter.addOption("junk", "\0omg:!\\xyzzy");
+ AccumuloInputFormat.addIterator(job, iter);
+
+ List<IteratorSetting> list = AccumuloInputFormat.getIterators(job);
+
+ // Check the list size
+ assertTrue(list.size() == 3);
+
+ // Walk the list and make sure our settings are correct
+ IteratorSetting setting = list.get(0);
+ assertEquals(1, setting.getPriority());
+ assertEquals("org.apache.accumulo.core.iterators.user.WholeRowIterator", setting.getIteratorClass());
+ assertEquals("WholeRow", setting.getName());
+ assertEquals(0, setting.getOptions().size());
+
+ setting = list.get(1);
+ assertEquals(2, setting.getPriority());
+ assertEquals("org.apache.accumulo.core.iterators.VersioningIterator", setting.getIteratorClass());
+ assertEquals("Versions", setting.getName());
+ assertEquals(0, setting.getOptions().size());
+
+ setting = list.get(2);
+ assertEquals(3, setting.getPriority());
+ assertEquals("org.apache.accumulo.core.iterators.CountingIterator", setting.getIteratorClass());
+ assertEquals("Count", setting.getName());
+ assertEquals(2, setting.getOptions().size());
+ assertEquals("1", setting.getOptions().get("v1"));
+ assertEquals("\0omg:!\\xyzzy", setting.getOptions().get("junk"));
+ }
+
+ /**
+ * Test adding iterator options where the keys and values contain both the FIELD_SEPARATOR character (':') and ITERATOR_SEPARATOR (',') characters. There
+ * should be no exceptions thrown when trying to parse these types of option entries.
+ *
+ * This test makes sure that the expected raw values, as appears in the Job, are equal to what's expected.
+ */
+ @Test
+ public void testIteratorOptionEncoding() throws Throwable {
+ String key = "colon:delimited:key";
+ String value = "comma,delimited,value";
+ IteratorSetting someSetting = new IteratorSetting(1, "iterator", "Iterator.class");
+ someSetting.addOption(key, value);
+ AccumuloInputFormat.addIterator(job, someSetting);
+
+ List<IteratorSetting> list = AccumuloInputFormat.getIterators(job);
+ assertEquals(1, list.size());
+ assertEquals(1, list.get(0).getOptions().size());
+ assertEquals(list.get(0).getOptions().get(key), value);
+
+ someSetting.addOption(key + "2", value);
+ someSetting.setPriority(2);
+ someSetting.setName("it2");
+ AccumuloInputFormat.addIterator(job, someSetting);
+ list = AccumuloInputFormat.getIterators(job);
+ assertEquals(2, list.size());
+ assertEquals(1, list.get(0).getOptions().size());
+ assertEquals(list.get(0).getOptions().get(key), value);
+ assertEquals(2, list.get(1).getOptions().size());
+ assertEquals(list.get(1).getOptions().get(key), value);
+ assertEquals(list.get(1).getOptions().get(key + "2"), value);
+ }
+
+ /**
+ * Test getting iterator settings for multiple iterators set
+ */
+ @Test
+ public void testGetIteratorSettings() throws IOException {
+ AccumuloInputFormat.addIterator(job, new IteratorSetting(1, "WholeRow", "org.apache.accumulo.core.iterators.WholeRowIterator"));
+ AccumuloInputFormat.addIterator(job, new IteratorSetting(2, "Versions", "org.apache.accumulo.core.iterators.VersioningIterator"));
+ AccumuloInputFormat.addIterator(job, new IteratorSetting(3, "Count", "org.apache.accumulo.core.iterators.CountingIterator"));
+
+ List<IteratorSetting> list = AccumuloInputFormat.getIterators(job);
+
+ // Check the list size
+ assertTrue(list.size() == 3);
+
+ // Walk the list and make sure our settings are correct
+ IteratorSetting setting = list.get(0);
+ assertEquals(1, setting.getPriority());
+ assertEquals("org.apache.accumulo.core.iterators.WholeRowIterator", setting.getIteratorClass());
+ assertEquals("WholeRow", setting.getName());
+
+ setting = list.get(1);
+ assertEquals(2, setting.getPriority());
+ assertEquals("org.apache.accumulo.core.iterators.VersioningIterator", setting.getIteratorClass());
+ assertEquals("Versions", setting.getName());
+
+ setting = list.get(2);
+ assertEquals(3, setting.getPriority());
+ assertEquals("org.apache.accumulo.core.iterators.CountingIterator", setting.getIteratorClass());
+ assertEquals("Count", setting.getName());
+
+ }
+
+ @Test
+ public void testSetRegex() throws IOException {
+ String regex = ">\"*%<>\'\\";
+
+ IteratorSetting is = new IteratorSetting(50, regex, RegExFilter.class);
+ RegExFilter.setRegexs(is, regex, null, null, null, false);
+ AccumuloInputFormat.addIterator(job, is);
+
+ assertTrue(regex.equals(AccumuloInputFormat.getIterators(job).get(0).getName()));
+ }
+
+ private static AssertionError e1 = null;
+ private static AssertionError e2 = null;
+
+ private static class MRTester extends Configured implements Tool {
+ private static class TestMapper implements Mapper<Key,Value,Key,Value> {
+ Key key = null;
+ int count = 0;
+
+ @Override
+ public void map(Key k, Value v, OutputCollector<Key,Value> output, Reporter reporter) throws IOException {
+ try {
+ if (key != null)
+ assertEquals(key.getRow().toString(), new String(v.get()));
+ assertEquals(k.getRow(), new Text(String.format("%09x", count + 1)));
+ assertEquals(new String(v.get()), String.format("%09x", count));
+ } catch (AssertionError e) {
+ e1 = e;
+ }
+ key = new Key(k);
+ count++;
+ }
+
+ @Override
+ public void configure(JobConf job) {}
+
+ @Override
+ public void close() throws IOException {
+ try {
+ assertEquals(100, count);
+ } catch (AssertionError e) {
+ e2 = e;
+ }
+ }
+
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+
+ if (args.length != 3) {
+ throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <user> <pass> <table>");
+ }
+
+ String user = args[0];
+ String pass = args[1];
+ String table = args[2];
+
+ JobConf job = new JobConf(getConf());
+ job.setJarByClass(this.getClass());
+
+ job.setInputFormat(AccumuloInputFormat.class);
+
+ AccumuloInputFormat.setConnectorInfo(job, user, new PasswordToken(pass));
+ AccumuloInputFormat.setInputTableName(job, table);
+ AccumuloInputFormat.setMockInstance(job, INSTANCE_NAME);
+
+ job.setMapperClass(TestMapper.class);
+ job.setMapOutputKeyClass(Key.class);
+ job.setMapOutputValueClass(Value.class);
+ job.setOutputFormat(NullOutputFormat.class);
+
+ job.setNumReduceTasks(0);
+
+ return JobClient.runJob(job).isSuccessful() ? 0 : 1;
+ }
+
+ public static void main(String... args) throws Exception {
+ assertEquals(0, ToolRunner.run(new Configuration(), new MRTester(), args));
+ }
+ }
+
+ @Test
+ public void testMap() throws Exception {
+ MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
+ Connector c = mockInstance.getConnector("root", new PasswordToken(""));
+ c.tableOperations().create(TEST_TABLE_1);
+ BatchWriter bw = c.createBatchWriter(TEST_TABLE_1, new BatchWriterConfig());
+ for (int i = 0; i < 100; i++) {
+ Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
+ m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
+ bw.addMutation(m);
+ }
+ bw.close();
+
+ MRTester.main("root", "", TEST_TABLE_1);
+ assertNull(e1);
+ assertNull(e2);
+ }
+}
[10/12] ACCUMULO-1880 create mapreduce module
Posted by md...@apache.org.
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
deleted file mode 100644
index e58e350..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
+++ /dev/null
@@ -1,384 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.accumulo.core.client.ClientSideIteratorScanner;
-import org.apache.accumulo.core.client.IsolatedScanner;
-import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.client.Scanner;
-import org.apache.accumulo.core.client.TableNotFoundException;
-import org.apache.accumulo.core.client.impl.TabletLocator;
-import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.util.Pair;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-/**
- * This abstract {@link InputFormat} class allows MapReduce jobs to use Accumulo as the source of K,V pairs.
- * <p>
- * Subclasses must implement a {@link #createRecordReader(InputSplit, TaskAttemptContext)} to provide a {@link RecordReader} for K,V.
- * <p>
- * A static base class, RecordReaderBase, is provided to retrieve Accumulo {@link Key}/{@link Value} pairs, but one must implement its
- * {@link RecordReaderBase#nextKeyValue()} to transform them to the desired generic types K,V.
- * <p>
- * See {@link AccumuloInputFormat} for an example implementation.
- */
-public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
-
- /**
- * Gets the table name from the configuration.
- *
- * @param context
- * the Hadoop context for the configured job
- * @return the table name
- * @since 1.5.0
- * @see #setInputTableName(Job, String)
- */
- protected static String getInputTableName(JobContext context) {
- return InputConfigurator.getInputTableName(CLASS, getConfiguration(context));
- }
-
- /**
- * Sets the name of the input table, over which this job will scan.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param tableName
- * the table to use when the tablename is null in the write call
- * @since 1.5.0
- */
- public static void setInputTableName(Job job, String tableName) {
- InputConfigurator.setInputTableName(CLASS, job.getConfiguration(), tableName);
- }
-
- /**
- * Sets the input ranges to scan for the single input table associated with this job.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param ranges
- * the ranges that will be mapped over
- * @since 1.5.0
- */
- public static void setRanges(Job job, Collection<Range> ranges) {
- InputConfigurator.setRanges(CLASS, job.getConfiguration(), ranges);
- }
-
- /**
- * Gets the ranges to scan over from a job.
- *
- * @param context
- * the Hadoop context for the configured job
- * @return the ranges
- * @since 1.5.0
- * @see #setRanges(Job, Collection)
- */
- protected static List<Range> getRanges(JobContext context) throws IOException {
- return InputConfigurator.getRanges(CLASS, getConfiguration(context));
- }
-
- /**
- * Restricts the columns that will be mapped over for this job for the default input table.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param columnFamilyColumnQualifierPairs
- * a pair of {@link Text} objects corresponding to column family and column qualifier. If the column qualifier is null, the entire column family is
- * selected. An empty set is the default and is equivalent to scanning the all columns.
- * @since 1.5.0
- */
- public static void fetchColumns(Job job, Collection<Pair<Text,Text>> columnFamilyColumnQualifierPairs) {
- InputConfigurator.fetchColumns(CLASS, job.getConfiguration(), columnFamilyColumnQualifierPairs);
- }
-
- /**
- * Gets the columns to be mapped over from this job.
- *
- * @param context
- * the Hadoop context for the configured job
- * @return a set of columns
- * @since 1.5.0
- * @see #fetchColumns(Job, Collection)
- */
- protected static Set<Pair<Text,Text>> getFetchedColumns(JobContext context) {
- return InputConfigurator.getFetchedColumns(CLASS, getConfiguration(context));
- }
-
- /**
- * Encode an iterator on the single input table for this job.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param cfg
- * the configuration of the iterator
- * @since 1.5.0
- */
- public static void addIterator(Job job, IteratorSetting cfg) {
- InputConfigurator.addIterator(CLASS, job.getConfiguration(), cfg);
- }
-
- /**
- * Gets a list of the iterator settings (for iterators to apply to a scanner) from this configuration.
- *
- * @param context
- * the Hadoop context for the configured job
- * @return a list of iterators
- * @since 1.5.0
- * @see #addIterator(Job, IteratorSetting)
- */
- protected static List<IteratorSetting> getIterators(JobContext context) {
- return InputConfigurator.getIterators(CLASS, getConfiguration(context));
- }
-
- /**
- * Controls the automatic adjustment of ranges for this job. This feature merges overlapping ranges, then splits them to align with tablet boundaries.
- * Disabling this feature will cause exactly one Map task to be created for each specified range. The default setting is enabled. *
- *
- * <p>
- * By default, this feature is <b>enabled</b>.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param enableFeature
- * the feature is enabled if true, disabled otherwise
- * @see #setRanges(Job, Collection)
- * @since 1.5.0
- */
- public static void setAutoAdjustRanges(Job job, boolean enableFeature) {
- InputConfigurator.setAutoAdjustRanges(CLASS, job.getConfiguration(), enableFeature);
- }
-
- /**
- * Determines whether a configuration has auto-adjust ranges enabled.
- *
- * @param context
- * the Hadoop context for the configured job
- * @return false if the feature is disabled, true otherwise
- * @since 1.5.0
- * @see #setAutoAdjustRanges(Job, boolean)
- */
- protected static boolean getAutoAdjustRanges(JobContext context) {
- return InputConfigurator.getAutoAdjustRanges(CLASS, getConfiguration(context));
- }
-
- /**
- * Controls the use of the {@link IsolatedScanner} in this job.
- *
- * <p>
- * By default, this feature is <b>disabled</b>.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param enableFeature
- * the feature is enabled if true, disabled otherwise
- * @since 1.5.0
- */
- public static void setScanIsolation(Job job, boolean enableFeature) {
- InputConfigurator.setScanIsolation(CLASS, job.getConfiguration(), enableFeature);
- }
-
- /**
- * Determines whether a configuration has isolation enabled.
- *
- * @param context
- * the Hadoop context for the configured job
- * @return true if the feature is enabled, false otherwise
- * @since 1.5.0
- * @see #setScanIsolation(Job, boolean)
- */
- protected static boolean isIsolated(JobContext context) {
- return InputConfigurator.isIsolated(CLASS, getConfiguration(context));
- }
-
- /**
- * Controls the use of the {@link ClientSideIteratorScanner} in this job. Enabling this feature will cause the iterator stack to be constructed within the Map
- * task, rather than within the Accumulo TServer. To use this feature, all classes needed for those iterators must be available on the classpath for the task.
- *
- * <p>
- * By default, this feature is <b>disabled</b>.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param enableFeature
- * the feature is enabled if true, disabled otherwise
- * @since 1.5.0
- */
- public static void setLocalIterators(Job job, boolean enableFeature) {
- InputConfigurator.setLocalIterators(CLASS, job.getConfiguration(), enableFeature);
- }
-
- /**
- * Determines whether a configuration uses local iterators.
- *
- * @param context
- * the Hadoop context for the configured job
- * @return true if the feature is enabled, false otherwise
- * @since 1.5.0
- * @see #setLocalIterators(Job, boolean)
- */
- protected static boolean usesLocalIterators(JobContext context) {
- return InputConfigurator.usesLocalIterators(CLASS, getConfiguration(context));
- }
-
- /**
- * <p>
- * Enable reading offline tables. By default, this feature is disabled and only online tables are scanned. This will make the map reduce job directly read the
- * table's files. If the table is not offline, then the job will fail. If the table comes online during the map reduce job, it is likely that the job will
- * fail.
- *
- * <p>
- * To use this option, the map reduce user will need access to read the Accumulo directory in HDFS.
- *
- * <p>
- * Reading the offline table will create the scan time iterator stack in the map process. So any iterators that are configured for the table will need to be
- * on the mapper's classpath.
- *
- * <p>
- * One way to use this feature is to clone a table, take the clone offline, and use the clone as the input table for a map reduce job. If you plan to map
- * reduce over the data many times, it may be better to the compact the table, clone it, take it offline, and use the clone for all map reduce jobs. The
- * reason to do this is that compaction will reduce each tablet in the table to one file, and it is faster to read from one file.
- *
- * <p>
- * There are two possible advantages to reading a tables file directly out of HDFS. First, you may see better read performance. Second, it will support
- * speculative execution better. When reading an online table speculative execution can put more load on an already slow tablet server.
- *
- * <p>
- * By default, this feature is <b>disabled</b>.
- *
- * @param job
- * the Hadoop job instance to be configured
- * @param enableFeature
- * the feature is enabled if true, disabled otherwise
- * @since 1.5.0
- */
- public static void setOfflineTableScan(Job job, boolean enableFeature) {
- InputConfigurator.setOfflineTableScan(CLASS, job.getConfiguration(), enableFeature);
- }
-
- /**
- * Determines whether a configuration has the offline table scan feature enabled.
- *
- * @param context
- * the Hadoop context for the configured job
- * @return true if the feature is enabled, false otherwise
- * @since 1.5.0
- * @see #setOfflineTableScan(Job, boolean)
- */
- protected static boolean isOfflineScan(JobContext context) {
- return InputConfigurator.isOfflineScan(CLASS, getConfiguration(context));
- }
-
- /**
- * Initializes an Accumulo {@link org.apache.accumulo.core.client.impl.TabletLocator} based on the configuration.
- *
- * @param context
- * the Hadoop context for the configured job
- * @return an Accumulo tablet locator
- * @throws org.apache.accumulo.core.client.TableNotFoundException
- * if the table name set on the configuration doesn't exist
- * @since 1.5.0
- * @deprecated since 1.6.0
- */
- @Deprecated
- protected static TabletLocator getTabletLocator(JobContext context) throws TableNotFoundException {
- return InputConfigurator.getTabletLocator(CLASS, getConfiguration(context), InputConfigurator.getInputTableName(CLASS, getConfiguration(context)));
- }
-
- protected abstract static class RecordReaderBase<K,V> extends AbstractRecordReader<K,V> {
-
- /**
- * Apply the configured iterators from the configuration to the scanner for the specified table name
- *
- * @param context
- * the Hadoop context for the configured job
- * @param scanner
- * the scanner to configure
- * @since 1.6.0
- */
- @Override
- protected void setupIterators(TaskAttemptContext context, Scanner scanner, String tableName, org.apache.accumulo.core.client.mapreduce.RangeInputSplit split) {
- setupIterators(context, scanner, split);
- }
-
- /**
- * Apply the configured iterators from the configuration to the scanner.
- *
- * @param context
- * the Hadoop context for the configured job
- * @param scanner
- * the scanner to configure
- */
- @Deprecated
- protected void setupIterators(TaskAttemptContext context, Scanner scanner) {
- setupIterators(context, scanner, null);
- }
-
- /**
- * Initialize a scanner over the given input split using this task attempt configuration.
- */
- protected void setupIterators(TaskAttemptContext context, Scanner scanner, org.apache.accumulo.core.client.mapreduce.RangeInputSplit split) {
- List<IteratorSetting> iterators = null;
- if (null == split) {
- iterators = getIterators(context);
- } else {
- iterators = split.getIterators();
- if (null == iterators) {
- iterators = getIterators(context);
- }
- }
- for (IteratorSetting iterator : iterators)
- scanner.addScanIterator(iterator);
- }
- }
-
- /**
- * @deprecated since 1.5.2; Use {@link org.apache.accumulo.core.client.mapreduce.RangeInputSplit} instead.
- * @see org.apache.accumulo.core.client.mapreduce.RangeInputSplit
- */
- @Deprecated
- public static class RangeInputSplit extends org.apache.accumulo.core.client.mapreduce.RangeInputSplit {
-
- public RangeInputSplit() {
- super();
- }
-
- public RangeInputSplit(RangeInputSplit other) throws IOException {
- super(other);
- }
-
- protected RangeInputSplit(String table, Range range, String[] locations) {
- super(table, "", range, locations);
- }
-
- public RangeInputSplit(String table, String tableId, Range range, String[] locations) {
- super(table, tableId, range, locations);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputTableConfig.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputTableConfig.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputTableConfig.java
deleted file mode 100644
index e59451e..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputTableConfig.java
+++ /dev/null
@@ -1,367 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.List;
-
-import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.util.Pair;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-
-/**
- * This class to holds a batch scan configuration for a table. It contains all the properties needed to specify how rows should be returned from the table.
- */
-public class InputTableConfig implements Writable {
-
- private List<IteratorSetting> iterators;
- private List<Range> ranges;
- private Collection<Pair<Text,Text>> columns;
-
- private boolean autoAdjustRanges = true;
- private boolean useLocalIterators = false;
- private boolean useIsolatedScanners = false;
- private boolean offlineScan = false;
-
- public InputTableConfig() {}
-
- /**
- * Creates a batch scan config object out of a previously serialized batch scan config object.
- *
- * @param input
- * the data input of the serialized batch scan config
- */
- public InputTableConfig(DataInput input) throws IOException {
- readFields(input);
- }
-
- /**
- * Sets the input ranges to scan for all tables associated with this job. This will be added to any per-table ranges that have been set using
- *
- * @param ranges
- * the ranges that will be mapped over
- * @since 1.6.0
- */
- public InputTableConfig setRanges(List<Range> ranges) {
- this.ranges = ranges;
- return this;
- }
-
- /**
- * Returns the ranges to be queried in the configuration
- */
- public List<Range> getRanges() {
- return ranges != null ? ranges : new ArrayList<Range>();
- }
-
- /**
- * Restricts the columns that will be mapped over for this job for the default input table.
- *
- * @param columns
- * a pair of {@link Text} objects corresponding to column family and column qualifier. If the column qualifier is null, the entire column family is
- * selected. An empty set is the default and is equivalent to scanning the all columns.
- * @since 1.6.0
- */
- public InputTableConfig fetchColumns(Collection<Pair<Text,Text>> columns) {
- this.columns = columns;
- return this;
- }
-
- /**
- * Returns the columns to be fetched for this configuration
- */
- public Collection<Pair<Text,Text>> getFetchedColumns() {
- return columns != null ? columns : new HashSet<Pair<Text,Text>>();
- }
-
- /**
- * Set iterators on to be used in the query.
- *
- * @param iterators
- * the configurations for the iterators
- * @since 1.6.0
- */
- public InputTableConfig setIterators(List<IteratorSetting> iterators) {
- this.iterators = iterators;
- return this;
- }
-
- /**
- * Returns the iterators to be set on this configuration
- */
- public List<IteratorSetting> getIterators() {
- return iterators != null ? iterators : new ArrayList<IteratorSetting>();
- }
-
- /**
- * Controls the automatic adjustment of ranges for this job. This feature merges overlapping ranges, then splits them to align with tablet boundaries.
- * Disabling this feature will cause exactly one Map task to be created for each specified range. The default setting is enabled. *
- *
- * <p>
- * By default, this feature is <b>enabled</b>.
- *
- * @param autoAdjustRanges
- * the feature is enabled if true, disabled otherwise
- * @see #setRanges(java.util.List)
- * @since 1.6.0
- */
- public InputTableConfig setAutoAdjustRanges(boolean autoAdjustRanges) {
- this.autoAdjustRanges = autoAdjustRanges;
- return this;
- }
-
- /**
- * Determines whether a configuration has auto-adjust ranges enabled.
- *
- * @return false if the feature is disabled, true otherwise
- * @since 1.6.0
- * @see #setAutoAdjustRanges(boolean)
- */
- public boolean shouldAutoAdjustRanges() {
- return autoAdjustRanges;
- }
-
- /**
- * Controls the use of the {@link org.apache.accumulo.core.client.ClientSideIteratorScanner} in this job. Enabling this feature will cause the iterator stack
- * to be constructed within the Map task, rather than within the Accumulo TServer. To use this feature, all classes needed for those iterators must be
- * available on the classpath for the task.
- *
- * <p>
- * By default, this feature is <b>disabled</b>.
- *
- * @param useLocalIterators
- * the feature is enabled if true, disabled otherwise
- * @since 1.6.0
- */
- public InputTableConfig setUseLocalIterators(boolean useLocalIterators) {
- this.useLocalIterators = useLocalIterators;
- return this;
- }
-
- /**
- * Determines whether a configuration uses local iterators.
- *
- * @return true if the feature is enabled, false otherwise
- * @since 1.6.0
- * @see #setUseLocalIterators(boolean)
- */
- public boolean shouldUseLocalIterators() {
- return useLocalIterators;
- }
-
- /**
- * <p>
- * Enable reading offline tables. By default, this feature is disabled and only online tables are scanned. This will make the map reduce job directly read the
- * table's files. If the table is not offline, then the job will fail. If the table comes online during the map reduce job, it is likely that the job will
- * fail.
- *
- * <p>
- * To use this option, the map reduce user will need access to read the Accumulo directory in HDFS.
- *
- * <p>
- * Reading the offline table will create the scan time iterator stack in the map process. So any iterators that are configured for the table will need to be
- * on the mapper's classpath. The accumulo-site.xml may need to be on the mapper's classpath if HDFS or the Accumulo directory in HDFS are non-standard.
- *
- * <p>
- * One way to use this feature is to clone a table, take the clone offline, and use the clone as the input table for a map reduce job. If you plan to map
- * reduce over the data many times, it may be better to the compact the table, clone it, take it offline, and use the clone for all map reduce jobs. The
- * reason to do this is that compaction will reduce each tablet in the table to one file, and it is faster to read from one file.
- *
- * <p>
- * There are two possible advantages to reading a tables file directly out of HDFS. First, you may see better read performance. Second, it will support
- * speculative execution better. When reading an online table speculative execution can put more load on an already slow tablet server.
- *
- * <p>
- * By default, this feature is <b>disabled</b>.
- *
- * @param offlineScan
- * the feature is enabled if true, disabled otherwise
- * @since 1.6.0
- */
- public InputTableConfig setOfflineScan(boolean offlineScan) {
- this.offlineScan = offlineScan;
- return this;
- }
-
- /**
- * Determines whether a configuration has the offline table scan feature enabled.
- *
- * @return true if the feature is enabled, false otherwise
- * @since 1.6.0
- * @see #setOfflineScan(boolean)
- */
- public boolean isOfflineScan() {
- return offlineScan;
- }
-
- /**
- * Controls the use of the {@link org.apache.accumulo.core.client.IsolatedScanner} in this job.
- *
- * <p>
- * By default, this feature is <b>disabled</b>.
- *
- * @param useIsolatedScanners
- * the feature is enabled if true, disabled otherwise
- * @since 1.6.0
- */
- public InputTableConfig setUseIsolatedScanners(boolean useIsolatedScanners) {
- this.useIsolatedScanners = useIsolatedScanners;
- return this;
- }
-
- /**
- * Determines whether a configuration has isolation enabled.
- *
- * @return true if the feature is enabled, false otherwise
- * @since 1.6.0
- * @see #setUseIsolatedScanners(boolean)
- */
- public boolean shouldUseIsolatedScanners() {
- return useIsolatedScanners;
- }
-
- /**
- * Writes the state for the current object out to the specified {@link DataOutput}
- *
- * @param dataOutput
- * the output for which to write the object's state
- */
- @Override
- public void write(DataOutput dataOutput) throws IOException {
- if (iterators != null) {
- dataOutput.writeInt(iterators.size());
- for (IteratorSetting setting : iterators)
- setting.write(dataOutput);
- } else {
- dataOutput.writeInt(0);
- }
- if (ranges != null) {
- dataOutput.writeInt(ranges.size());
- for (Range range : ranges)
- range.write(dataOutput);
- } else {
- dataOutput.writeInt(0);
- }
- if (columns != null) {
- dataOutput.writeInt(columns.size());
- for (Pair<Text,Text> column : columns) {
- if (column.getSecond() == null) {
- dataOutput.writeInt(1);
- column.getFirst().write(dataOutput);
- } else {
- dataOutput.writeInt(2);
- column.getFirst().write(dataOutput);
- column.getSecond().write(dataOutput);
- }
- }
- } else {
- dataOutput.writeInt(0);
- }
- dataOutput.writeBoolean(autoAdjustRanges);
- dataOutput.writeBoolean(useLocalIterators);
- dataOutput.writeBoolean(useIsolatedScanners);
- }
-
- /**
- * Reads the fields in the {@link DataInput} into the current object
- *
- * @param dataInput
- * the input fields to read into the current object
- */
- @Override
- public void readFields(DataInput dataInput) throws IOException {
- // load iterators
- long iterSize = dataInput.readInt();
- if (iterSize > 0)
- iterators = new ArrayList<IteratorSetting>();
- for (int i = 0; i < iterSize; i++)
- iterators.add(new IteratorSetting(dataInput));
- // load ranges
- long rangeSize = dataInput.readInt();
- if (rangeSize > 0)
- ranges = new ArrayList<Range>();
- for (int i = 0; i < rangeSize; i++) {
- Range range = new Range();
- range.readFields(dataInput);
- ranges.add(range);
- }
- // load columns
- long columnSize = dataInput.readInt();
- if (columnSize > 0)
- columns = new HashSet<Pair<Text,Text>>();
- for (int i = 0; i < columnSize; i++) {
- long numPairs = dataInput.readInt();
- Text colFam = new Text();
- colFam.readFields(dataInput);
- if (numPairs == 1) {
- columns.add(new Pair<Text,Text>(colFam, null));
- } else if (numPairs == 2) {
- Text colQual = new Text();
- colQual.readFields(dataInput);
- columns.add(new Pair<Text,Text>(colFam, colQual));
- }
- }
- autoAdjustRanges = dataInput.readBoolean();
- useLocalIterators = dataInput.readBoolean();
- useIsolatedScanners = dataInput.readBoolean();
- }
-
- @Override
- public boolean equals(Object o) {
- if (this == o)
- return true;
- if (o == null || getClass() != o.getClass())
- return false;
-
- InputTableConfig that = (InputTableConfig) o;
-
- if (autoAdjustRanges != that.autoAdjustRanges)
- return false;
- if (offlineScan != that.offlineScan)
- return false;
- if (useIsolatedScanners != that.useIsolatedScanners)
- return false;
- if (useLocalIterators != that.useLocalIterators)
- return false;
- if (columns != null ? !columns.equals(that.columns) : that.columns != null)
- return false;
- if (iterators != null ? !iterators.equals(that.iterators) : that.iterators != null)
- return false;
- if (ranges != null ? !ranges.equals(that.ranges) : that.ranges != null)
- return false;
- return true;
- }
-
- @Override
- public int hashCode() {
- int result = 31 * (iterators != null ? iterators.hashCode() : 0);
- result = 31 * result + (ranges != null ? ranges.hashCode() : 0);
- result = 31 * result + (columns != null ? columns.hashCode() : 0);
- result = 31 * result + (autoAdjustRanges ? 1 : 0);
- result = 31 * result + (useLocalIterators ? 1 : 0);
- result = 31 * result + (useIsolatedScanners ? 1 : 0);
- result = 31 * result + (offlineScan ? 1 : 0);
- return result;
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
deleted file mode 100644
index 4b5a149..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
+++ /dev/null
@@ -1,490 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.math.BigInteger;
-import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.accumulo.core.client.ClientConfiguration;
-import org.apache.accumulo.core.client.Instance;
-import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.client.ZooKeeperInstance;
-import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
-import org.apache.accumulo.core.client.mapreduce.lib.impl.ConfiguratorBase.TokenSource;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
-import org.apache.accumulo.core.client.security.tokens.AuthenticationToken.AuthenticationTokenSerializer;
-import org.apache.accumulo.core.data.ByteSequence;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.PartialKey;
-import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.core.util.Pair;
-import org.apache.commons.codec.binary.Base64;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.log4j.Level;
-
-/**
- * The Class RangeInputSplit. Encapsulates an Accumulo range for use in Map Reduce jobs.
- */
-public class RangeInputSplit extends InputSplit implements Writable {
- private Range range;
- private String[] locations;
- private String tableId, tableName, instanceName, zooKeepers, principal;
- private TokenSource tokenSource;
- private String tokenFile;
- private AuthenticationToken token;
- private Boolean offline, mockInstance, isolatedScan, localIterators;
- private Authorizations auths;
- private Set<Pair<Text,Text>> fetchedColumns;
- private List<IteratorSetting> iterators;
- private Level level;
-
- public RangeInputSplit() {
- range = new Range();
- locations = new String[0];
- tableName = "";
- tableId = "";
- }
-
- public RangeInputSplit(RangeInputSplit split) throws IOException {
- this.setRange(split.getRange());
- this.setLocations(split.getLocations());
- this.setTableName(split.getTableName());
- this.setTableId(split.getTableId());
- }
-
- protected RangeInputSplit(String table, String tableId, Range range, String[] locations) {
- this.range = range;
- setLocations(locations);
- this.tableName = table;
- this.tableId = tableId;
- }
-
- public Range getRange() {
- return range;
- }
-
- private static byte[] extractBytes(ByteSequence seq, int numBytes) {
- byte[] bytes = new byte[numBytes + 1];
- bytes[0] = 0;
- for (int i = 0; i < numBytes; i++) {
- if (i >= seq.length())
- bytes[i + 1] = 0;
- else
- bytes[i + 1] = seq.byteAt(i);
- }
- return bytes;
- }
-
- public static float getProgress(ByteSequence start, ByteSequence end, ByteSequence position) {
- int maxDepth = Math.min(Math.max(end.length(), start.length()), position.length());
- BigInteger startBI = new BigInteger(extractBytes(start, maxDepth));
- BigInteger endBI = new BigInteger(extractBytes(end, maxDepth));
- BigInteger positionBI = new BigInteger(extractBytes(position, maxDepth));
- return (float) (positionBI.subtract(startBI).doubleValue() / endBI.subtract(startBI).doubleValue());
- }
-
- public float getProgress(Key currentKey) {
- if (currentKey == null)
- return 0f;
- if (range.getStartKey() != null && range.getEndKey() != null) {
- if (range.getStartKey().compareTo(range.getEndKey(), PartialKey.ROW) != 0) {
- // just look at the row progress
- return getProgress(range.getStartKey().getRowData(), range.getEndKey().getRowData(), currentKey.getRowData());
- } else if (range.getStartKey().compareTo(range.getEndKey(), PartialKey.ROW_COLFAM) != 0) {
- // just look at the column family progress
- return getProgress(range.getStartKey().getColumnFamilyData(), range.getEndKey().getColumnFamilyData(), currentKey.getColumnFamilyData());
- } else if (range.getStartKey().compareTo(range.getEndKey(), PartialKey.ROW_COLFAM_COLQUAL) != 0) {
- // just look at the column qualifier progress
- return getProgress(range.getStartKey().getColumnQualifierData(), range.getEndKey().getColumnQualifierData(), currentKey.getColumnQualifierData());
- }
- }
- // if we can't figure it out, then claim no progress
- return 0f;
- }
-
- /**
- * This implementation of length is only an estimate, it does not provide exact values. Do not have your code rely on this return value.
- */
- @Override
- public long getLength() throws IOException {
- Text startRow = range.isInfiniteStartKey() ? new Text(new byte[] {Byte.MIN_VALUE}) : range.getStartKey().getRow();
- Text stopRow = range.isInfiniteStopKey() ? new Text(new byte[] {Byte.MAX_VALUE}) : range.getEndKey().getRow();
- int maxCommon = Math.min(7, Math.min(startRow.getLength(), stopRow.getLength()));
- long diff = 0;
-
- byte[] start = startRow.getBytes();
- byte[] stop = stopRow.getBytes();
- for (int i = 0; i < maxCommon; ++i) {
- diff |= 0xff & (start[i] ^ stop[i]);
- diff <<= Byte.SIZE;
- }
-
- if (startRow.getLength() != stopRow.getLength())
- diff |= 0xff;
-
- return diff + 1;
- }
-
- @Override
- public String[] getLocations() throws IOException {
- return Arrays.copyOf(locations, locations.length);
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- range.readFields(in);
- tableName = in.readUTF();
- tableId = in.readUTF();
- int numLocs = in.readInt();
- locations = new String[numLocs];
- for (int i = 0; i < numLocs; ++i)
- locations[i] = in.readUTF();
-
- if (in.readBoolean()) {
- isolatedScan = in.readBoolean();
- }
-
- if (in.readBoolean()) {
- offline = in.readBoolean();
- }
-
- if (in.readBoolean()) {
- localIterators = in.readBoolean();
- }
-
- if (in.readBoolean()) {
- mockInstance = in.readBoolean();
- }
-
- if (in.readBoolean()) {
- int numColumns = in.readInt();
- List<String> columns = new ArrayList<String>(numColumns);
- for (int i = 0; i < numColumns; i++) {
- columns.add(in.readUTF());
- }
-
- fetchedColumns = InputConfigurator.deserializeFetchedColumns(columns);
- }
-
- if (in.readBoolean()) {
- String strAuths = in.readUTF();
- auths = new Authorizations(strAuths.getBytes(StandardCharsets.UTF_8));
- }
-
- if (in.readBoolean()) {
- principal = in.readUTF();
- }
-
- if (in.readBoolean()) {
- int ordinal = in.readInt();
- this.tokenSource = TokenSource.values()[ordinal];
-
- switch (this.tokenSource) {
- case INLINE:
- String tokenClass = in.readUTF();
- byte[] base64TokenBytes = in.readUTF().getBytes(StandardCharsets.UTF_8);
- byte[] tokenBytes = Base64.decodeBase64(base64TokenBytes);
-
- this.token = AuthenticationTokenSerializer.deserialize(tokenClass, tokenBytes);
- break;
-
- case FILE:
- this.tokenFile = in.readUTF();
-
- break;
- default:
- throw new IOException("Cannot parse unknown TokenSource ordinal");
- }
- }
-
- if (in.readBoolean()) {
- instanceName = in.readUTF();
- }
-
- if (in.readBoolean()) {
- zooKeepers = in.readUTF();
- }
-
- if (in.readBoolean()) {
- level = Level.toLevel(in.readInt());
- }
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- range.write(out);
- out.writeUTF(tableName);
- out.writeUTF(tableId);
- out.writeInt(locations.length);
- for (int i = 0; i < locations.length; ++i)
- out.writeUTF(locations[i]);
-
- out.writeBoolean(null != isolatedScan);
- if (null != isolatedScan) {
- out.writeBoolean(isolatedScan);
- }
-
- out.writeBoolean(null != offline);
- if (null != offline) {
- out.writeBoolean(offline);
- }
-
- out.writeBoolean(null != localIterators);
- if (null != localIterators) {
- out.writeBoolean(localIterators);
- }
-
- out.writeBoolean(null != mockInstance);
- if (null != mockInstance) {
- out.writeBoolean(mockInstance);
- }
-
- out.writeBoolean(null != fetchedColumns);
- if (null != fetchedColumns) {
- String[] cols = InputConfigurator.serializeColumns(fetchedColumns);
- out.writeInt(cols.length);
- for (String col : cols) {
- out.writeUTF(col);
- }
- }
-
- out.writeBoolean(null != auths);
- if (null != auths) {
- out.writeUTF(auths.serialize());
- }
-
- out.writeBoolean(null != principal);
- if (null != principal) {
- out.writeUTF(principal);
- }
-
- out.writeBoolean(null != tokenSource);
- if (null != tokenSource) {
- out.writeInt(tokenSource.ordinal());
-
- if (null != token && null != tokenFile) {
- throw new IOException("Cannot use both inline AuthenticationToken and file-based AuthenticationToken");
- } else if (null != token) {
- out.writeUTF(token.getClass().getCanonicalName());
- out.writeUTF(Base64.encodeBase64String(AuthenticationTokenSerializer.serialize(token)));
- } else {
- out.writeUTF(tokenFile);
- }
- }
-
- out.writeBoolean(null != instanceName);
- if (null != instanceName) {
- out.writeUTF(instanceName);
- }
-
- out.writeBoolean(null != zooKeepers);
- if (null != zooKeepers) {
- out.writeUTF(zooKeepers);
- }
-
- out.writeBoolean(null != level);
- if (null != level) {
- out.writeInt(level.toInt());
- }
- }
-
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder(256);
- sb.append("Range: ").append(range);
- sb.append(" Locations: ").append(Arrays.asList(locations));
- sb.append(" Table: ").append(tableName);
- sb.append(" TableID: ").append(tableId);
- sb.append(" InstanceName: ").append(instanceName);
- sb.append(" zooKeepers: ").append(zooKeepers);
- sb.append(" principal: ").append(principal);
- sb.append(" tokenSource: ").append(tokenSource);
- sb.append(" authenticationToken: ").append(token);
- sb.append(" authenticationTokenFile: ").append(tokenFile);
- sb.append(" Authorizations: ").append(auths);
- sb.append(" offlineScan: ").append(offline);
- sb.append(" mockInstance: ").append(mockInstance);
- sb.append(" isolatedScan: ").append(isolatedScan);
- sb.append(" localIterators: ").append(localIterators);
- sb.append(" fetchColumns: ").append(fetchedColumns);
- sb.append(" iterators: ").append(iterators);
- sb.append(" logLevel: ").append(level);
- return sb.toString();
- }
-
- public String getTableName() {
- return tableName;
- }
-
- public void setTableName(String table) {
- this.tableName = table;
- }
-
- public void setTableId(String tableId) {
- this.tableId = tableId;
- }
-
- public String getTableId() {
- return tableId;
- }
-
- public Instance getInstance() {
- if (null == instanceName) {
- return null;
- }
-
- if (isMockInstance()) {
- return new MockInstance(getInstanceName());
- }
-
- if (null == zooKeepers) {
- return null;
- }
-
- return new ZooKeeperInstance(ClientConfiguration.loadDefault().withInstance(getInstanceName()).withZkHosts(getZooKeepers()));
- }
-
- public String getInstanceName() {
- return instanceName;
- }
-
- public void setInstanceName(String instanceName) {
- this.instanceName = instanceName;
- }
-
- public String getZooKeepers() {
- return zooKeepers;
- }
-
- public void setZooKeepers(String zooKeepers) {
- this.zooKeepers = zooKeepers;
- }
-
- public String getPrincipal() {
- return principal;
- }
-
- public void setPrincipal(String principal) {
- this.principal = principal;
- }
-
- public AuthenticationToken getToken() {
- return token;
- }
-
- public void setToken(AuthenticationToken token) {
- this.tokenSource = TokenSource.INLINE;
- this.token = token;
- }
-
- public void setToken(String tokenFile) {
- this.tokenSource = TokenSource.FILE;
- this.tokenFile = tokenFile;
- }
-
- public Boolean isOffline() {
- return offline;
- }
-
- public void setOffline(Boolean offline) {
- this.offline = offline;
- }
-
- public void setLocations(String[] locations) {
- this.locations = Arrays.copyOf(locations, locations.length);
- }
-
- public Boolean isMockInstance() {
- return mockInstance;
- }
-
- public void setMockInstance(Boolean mockInstance) {
- this.mockInstance = mockInstance;
- }
-
- public Boolean isIsolatedScan() {
- return isolatedScan;
- }
-
- public void setIsolatedScan(Boolean isolatedScan) {
- this.isolatedScan = isolatedScan;
- }
-
- public Authorizations getAuths() {
- return auths;
- }
-
- public void setAuths(Authorizations auths) {
- this.auths = auths;
- }
-
- public void setRange(Range range) {
- this.range = range;
- }
-
- public Boolean usesLocalIterators() {
- return localIterators;
- }
-
- public void setUsesLocalIterators(Boolean localIterators) {
- this.localIterators = localIterators;
- }
-
- public Set<Pair<Text,Text>> getFetchedColumns() {
- return fetchedColumns;
- }
-
- public void setFetchedColumns(Collection<Pair<Text,Text>> fetchedColumns) {
- this.fetchedColumns = new HashSet<Pair<Text,Text>>();
- for (Pair<Text,Text> columns : fetchedColumns) {
- this.fetchedColumns.add(columns);
- }
- }
-
- public void setFetchedColumns(Set<Pair<Text,Text>> fetchedColumns) {
- this.fetchedColumns = fetchedColumns;
- }
-
- public List<IteratorSetting> getIterators() {
- return iterators;
- }
-
- public void setIterators(List<IteratorSetting> iterators) {
- this.iterators = iterators;
- }
-
- public Level getLogLevel() {
- return level;
- }
-
- public void setLogLevel(Level level) {
- this.level = level;
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/ConfiguratorBase.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/ConfiguratorBase.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/ConfiguratorBase.java
deleted file mode 100644
index 4610556..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/ConfiguratorBase.java
+++ /dev/null
@@ -1,369 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce.lib.impl;
-
-import static com.google.common.base.Preconditions.checkArgument;
-
-import java.io.IOException;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.nio.charset.StandardCharsets;
-
-import org.apache.accumulo.core.client.AccumuloSecurityException;
-import org.apache.accumulo.core.client.ClientConfiguration;
-import org.apache.accumulo.core.client.Instance;
-import org.apache.accumulo.core.client.ZooKeeperInstance;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
-import org.apache.accumulo.core.client.security.tokens.AuthenticationToken.AuthenticationTokenSerializer;
-import org.apache.accumulo.core.security.Credentials;
-import org.apache.commons.codec.binary.Base64;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.util.StringUtils;
-import org.apache.log4j.Level;
-import org.apache.log4j.Logger;
-
-/**
- * @since 1.6.0
- */
-public class ConfiguratorBase {
-
- /**
- * Configuration keys for {@link Instance#getConnector(String, AuthenticationToken)}.
- *
- * @since 1.6.0
- */
- public static enum ConnectorInfo {
- IS_CONFIGURED, PRINCIPAL, TOKEN,
- }
-
- public static enum TokenSource {
- FILE, INLINE;
-
- private String prefix;
-
- private TokenSource() {
- prefix = name().toLowerCase() + ":";
- }
-
- public String prefix() {
- return prefix;
- }
- }
-
- /**
- * Configuration keys for {@link Instance}, {@link ZooKeeperInstance}, and {@link MockInstance}.
- *
- * @since 1.6.0
- */
- public static enum InstanceOpts {
- TYPE, NAME, ZOO_KEEPERS, CLIENT_CONFIG;
- }
-
- /**
- * Configuration keys for general configuration options.
- *
- * @since 1.6.0
- */
- public static enum GeneralOpts {
- LOG_LEVEL
- }
-
- /**
- * Provides a configuration key for a given feature enum, prefixed by the implementingClass
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param e
- * the enum used to provide the unique part of the configuration key
- * @return the configuration key
- * @since 1.6.0
- */
- protected static String enumToConfKey(Class<?> implementingClass, Enum<?> e) {
- return implementingClass.getSimpleName() + "." + e.getDeclaringClass().getSimpleName() + "." + StringUtils.camelize(e.name().toLowerCase());
- }
-
- /**
- * Sets the connector information needed to communicate with Accumulo in this job.
- *
- * <p>
- * <b>WARNING:</b> The serialized token is stored in the configuration and shared with all MapReduce tasks. It is BASE64 encoded to provide a charset safe
- * conversion to a string, and is not intended to be secure.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param principal
- * a valid Accumulo user name
- * @param token
- * the user's password
- * @since 1.6.0
- */
- public static void setConnectorInfo(Class<?> implementingClass, Configuration conf, String principal, AuthenticationToken token)
- throws AccumuloSecurityException {
- if (isConnectorInfoSet(implementingClass, conf))
- throw new IllegalStateException("Connector info for " + implementingClass.getSimpleName() + " can only be set once per job");
-
- checkArgument(principal != null, "principal is null");
- checkArgument(token != null, "token is null");
- conf.setBoolean(enumToConfKey(implementingClass, ConnectorInfo.IS_CONFIGURED), true);
- conf.set(enumToConfKey(implementingClass, ConnectorInfo.PRINCIPAL), principal);
- conf.set(enumToConfKey(implementingClass, ConnectorInfo.TOKEN),
- TokenSource.INLINE.prefix() + token.getClass().getName() + ":" + Base64.encodeBase64String(AuthenticationTokenSerializer.serialize(token)));
- }
-
- /**
- * Sets the connector information needed to communicate with Accumulo in this job.
- *
- * <p>
- * Pulls a token file into the Distributed Cache that contains the authentication token in an attempt to be more secure than storing the password in the
- * Configuration. Token file created with "bin/accumulo create-token".
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param principal
- * a valid Accumulo user name
- * @param tokenFile
- * the path to the token file in DFS
- * @since 1.6.0
- */
- public static void setConnectorInfo(Class<?> implementingClass, Configuration conf, String principal, String tokenFile) throws AccumuloSecurityException {
- if (isConnectorInfoSet(implementingClass, conf))
- throw new IllegalStateException("Connector info for " + implementingClass.getSimpleName() + " can only be set once per job");
-
- checkArgument(principal != null, "principal is null");
- checkArgument(tokenFile != null, "tokenFile is null");
-
- try {
- DistributedCacheHelper.addCacheFile(new URI(tokenFile), conf);
- } catch (URISyntaxException e) {
- throw new IllegalStateException("Unable to add tokenFile \"" + tokenFile + "\" to distributed cache.");
- }
-
- conf.setBoolean(enumToConfKey(implementingClass, ConnectorInfo.IS_CONFIGURED), true);
- conf.set(enumToConfKey(implementingClass, ConnectorInfo.PRINCIPAL), principal);
- conf.set(enumToConfKey(implementingClass, ConnectorInfo.TOKEN), TokenSource.FILE.prefix() + tokenFile);
- }
-
- /**
- * Determines if the connector info has already been set for this instance.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @return true if the connector info has already been set, false otherwise
- * @since 1.6.0
- * @see #setConnectorInfo(Class, Configuration, String, AuthenticationToken)
- */
- public static Boolean isConnectorInfoSet(Class<?> implementingClass, Configuration conf) {
- return conf.getBoolean(enumToConfKey(implementingClass, ConnectorInfo.IS_CONFIGURED), false);
- }
-
- /**
- * Gets the user name from the configuration.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @return the principal
- * @since 1.6.0
- * @see #setConnectorInfo(Class, Configuration, String, AuthenticationToken)
- */
- public static String getPrincipal(Class<?> implementingClass, Configuration conf) {
- return conf.get(enumToConfKey(implementingClass, ConnectorInfo.PRINCIPAL));
- }
-
- /**
- * Gets the authenticated token from either the specified token file or directly from the configuration, whichever was used when the job was configured.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @return the principal's authentication token
- * @since 1.6.0
- * @see #setConnectorInfo(Class, Configuration, String, AuthenticationToken)
- * @see #setConnectorInfo(Class, Configuration, String, String)
- */
- public static AuthenticationToken getAuthenticationToken(Class<?> implementingClass, Configuration conf) {
- String token = conf.get(enumToConfKey(implementingClass, ConnectorInfo.TOKEN));
- if (token == null || token.isEmpty())
- return null;
- if (token.startsWith(TokenSource.INLINE.prefix())) {
- String[] args = token.substring(TokenSource.INLINE.prefix().length()).split(":", 2);
- if (args.length == 2)
- return AuthenticationTokenSerializer.deserialize(args[0], Base64.decodeBase64(args[1].getBytes(StandardCharsets.UTF_8)));
- } else if (token.startsWith(TokenSource.FILE.prefix())) {
- String tokenFileName = token.substring(TokenSource.FILE.prefix().length());
- return getTokenFromFile(conf, getPrincipal(implementingClass, conf), tokenFileName);
- }
-
- throw new IllegalStateException("Token was not properly serialized into the configuration");
- }
-
- /**
- * Reads from the token file in distributed cache. Currently, the token file stores data separated by colons e.g. principal:token_class:token
- *
- * @param conf
- * the Hadoop context for the configured job
- * @return path to the token file as a String
- * @since 1.6.0
- * @see #setConnectorInfo(Class, Configuration, String, AuthenticationToken)
- */
- public static AuthenticationToken getTokenFromFile(Configuration conf, String principal, String tokenFile) {
- FSDataInputStream in = null;
- try {
- URI[] uris = DistributedCacheHelper.getCacheFiles(conf);
- Path path = null;
- for (URI u : uris) {
- if (u.toString().equals(tokenFile)) {
- path = new Path(u);
- }
- }
- if (path == null) {
- throw new IllegalArgumentException("Couldn't find password file called \"" + tokenFile + "\" in cache.");
- }
- FileSystem fs = FileSystem.get(conf);
- in = fs.open(path);
- } catch (IOException e) {
- throw new IllegalArgumentException("Couldn't open password file called \"" + tokenFile + "\".");
- }
- try (java.util.Scanner fileScanner = new java.util.Scanner(in)) {
- while (fileScanner.hasNextLine()) {
- Credentials creds = Credentials.deserialize(fileScanner.nextLine());
- if (principal.equals(creds.getPrincipal())) {
- return creds.getToken();
- }
- }
- throw new IllegalArgumentException("Couldn't find token for user \"" + principal + "\" in file \"" + tokenFile + "\"");
- }
- }
-
- /**
- * Configures a {@link ZooKeeperInstance} for this job.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param clientConfig
- * client configuration for specifying connection timeouts, SSL connection options, etc.
- * @since 1.6.0
- */
- public static void setZooKeeperInstance(Class<?> implementingClass, Configuration conf, ClientConfiguration clientConfig) {
- String key = enumToConfKey(implementingClass, InstanceOpts.TYPE);
- if (!conf.get(key, "").isEmpty())
- throw new IllegalStateException("Instance info can only be set once per job; it has already been configured with " + conf.get(key));
- conf.set(key, "ZooKeeperInstance");
- if (clientConfig != null) {
- conf.set(enumToConfKey(implementingClass, InstanceOpts.CLIENT_CONFIG), clientConfig.serialize());
- }
- }
-
- /**
- * Configures a {@link MockInstance} for this job.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param instanceName
- * the Accumulo instance name
- * @since 1.6.0
- */
- public static void setMockInstance(Class<?> implementingClass, Configuration conf, String instanceName) {
- String key = enumToConfKey(implementingClass, InstanceOpts.TYPE);
- if (!conf.get(key, "").isEmpty())
- throw new IllegalStateException("Instance info can only be set once per job; it has already been configured with " + conf.get(key));
- conf.set(key, "MockInstance");
-
- checkArgument(instanceName != null, "instanceName is null");
- conf.set(enumToConfKey(implementingClass, InstanceOpts.NAME), instanceName);
- }
-
- /**
- * Initializes an Accumulo {@link Instance} based on the configuration.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @return an Accumulo instance
- * @since 1.6.0
- * @see #setZooKeeperInstance(Class, Configuration, ClientConfiguration)
- * @see #setMockInstance(Class, Configuration, String)
- */
- public static Instance getInstance(Class<?> implementingClass, Configuration conf) {
- String instanceType = conf.get(enumToConfKey(implementingClass, InstanceOpts.TYPE), "");
- if ("MockInstance".equals(instanceType))
- return new MockInstance(conf.get(enumToConfKey(implementingClass, InstanceOpts.NAME)));
- else if ("ZooKeeperInstance".equals(instanceType)) {
- String clientConfigString = conf.get(enumToConfKey(implementingClass, InstanceOpts.CLIENT_CONFIG));
- if (clientConfigString == null) {
- String instanceName = conf.get(enumToConfKey(implementingClass, InstanceOpts.NAME));
- String zookeepers = conf.get(enumToConfKey(implementingClass, InstanceOpts.ZOO_KEEPERS));
- return new ZooKeeperInstance(ClientConfiguration.loadDefault().withInstance(instanceName).withZkHosts(zookeepers));
- } else {
- return new ZooKeeperInstance(ClientConfiguration.deserialize(clientConfigString));
- }
- } else if (instanceType.isEmpty())
- throw new IllegalStateException("Instance has not been configured for " + implementingClass.getSimpleName());
- else
- throw new IllegalStateException("Unrecognized instance type " + instanceType);
- }
-
- /**
- * Sets the log level for this job.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param level
- * the logging level
- * @since 1.6.0
- */
- public static void setLogLevel(Class<?> implementingClass, Configuration conf, Level level) {
- checkArgument(level != null, "level is null");
- Logger.getLogger(implementingClass).setLevel(level);
- conf.setInt(enumToConfKey(implementingClass, GeneralOpts.LOG_LEVEL), level.toInt());
- }
-
- /**
- * Gets the log level from this configuration.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @return the log level
- * @since 1.6.0
- * @see #setLogLevel(Class, Configuration, Level)
- */
- public static Level getLogLevel(Class<?> implementingClass, Configuration conf) {
- return Level.toLevel(conf.getInt(enumToConfKey(implementingClass, GeneralOpts.LOG_LEVEL), Level.INFO.toInt()));
- }
-
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/DistributedCacheHelper.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/DistributedCacheHelper.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/DistributedCacheHelper.java
deleted file mode 100644
index c694b9a..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/DistributedCacheHelper.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce.lib.impl;
-
-import java.io.IOException;
-import java.net.URI;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.filecache.DistributedCache;
-import org.apache.hadoop.fs.Path;
-
-/**
- * @since 1.6.0
- */
-@SuppressWarnings("deprecation")
-public class DistributedCacheHelper {
-
- /**
- * @since 1.6.0
- */
- public static void addCacheFile(URI uri, Configuration conf) {
- DistributedCache.addCacheFile(uri, conf);
- }
-
- /**
- * @since 1.6.0
- */
- public static URI[] getCacheFiles(Configuration conf) throws IOException {
- return DistributedCache.getCacheFiles(conf);
- }
-
- /**
- * @since 1.6.0
- */
- public static Path[] getLocalCacheFiles(Configuration conf) throws IOException {
- return DistributedCache.getLocalCacheFiles(conf);
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/FileOutputConfigurator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/FileOutputConfigurator.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/FileOutputConfigurator.java
deleted file mode 100644
index ce84209..0000000
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/FileOutputConfigurator.java
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce.lib.impl;
-
-import java.util.Arrays;
-import java.util.Map.Entry;
-
-import org.apache.accumulo.core.conf.AccumuloConfiguration;
-import org.apache.accumulo.core.conf.ConfigurationCopy;
-import org.apache.accumulo.core.conf.Property;
-import org.apache.hadoop.conf.Configuration;
-
-/**
- * @since 1.6.0
- */
-public class FileOutputConfigurator extends ConfiguratorBase {
-
- /**
- * Configuration keys for {@link AccumuloConfiguration}.
- *
- * @since 1.6.0
- */
- public static enum Opts {
- ACCUMULO_PROPERTIES;
- }
-
- /**
- * The supported Accumulo properties we set in this OutputFormat, that change the behavior of the RecordWriter.<br />
- * These properties correspond to the supported public static setter methods available to this class.
- *
- * @param property
- * the Accumulo property to check
- * @since 1.6.0
- */
- protected static Boolean isSupportedAccumuloProperty(Property property) {
- switch (property) {
- case TABLE_FILE_COMPRESSION_TYPE:
- case TABLE_FILE_COMPRESSED_BLOCK_SIZE:
- case TABLE_FILE_BLOCK_SIZE:
- case TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX:
- case TABLE_FILE_REPLICATION:
- return true;
- default:
- return false;
- }
- }
-
- /**
- * Helper for transforming Accumulo configuration properties into something that can be stored safely inside the Hadoop Job configuration.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param property
- * the supported Accumulo property
- * @param value
- * the value of the property to set
- * @since 1.6.0
- */
- private static <T> void setAccumuloProperty(Class<?> implementingClass, Configuration conf, Property property, T value) {
- if (isSupportedAccumuloProperty(property)) {
- String val = String.valueOf(value);
- if (property.getType().isValidFormat(val))
- conf.set(enumToConfKey(implementingClass, Opts.ACCUMULO_PROPERTIES) + "." + property.getKey(), val);
- else
- throw new IllegalArgumentException("Value is not appropriate for property type '" + property.getType() + "'");
- } else
- throw new IllegalArgumentException("Unsupported configuration property " + property.getKey());
- }
-
- /**
- * This helper method provides an AccumuloConfiguration object constructed from the Accumulo defaults, and overridden with Accumulo properties that have been
- * stored in the Job's configuration.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @since 1.6.0
- */
- public static AccumuloConfiguration getAccumuloConfiguration(Class<?> implementingClass, Configuration conf) {
- String prefix = enumToConfKey(implementingClass, Opts.ACCUMULO_PROPERTIES) + ".";
- ConfigurationCopy acuConf = new ConfigurationCopy(AccumuloConfiguration.getDefaultConfiguration());
- for (Entry<String,String> entry : conf)
- if (entry.getKey().startsWith(prefix))
- acuConf.set(Property.getPropertyByKey(entry.getKey().substring(prefix.length())), entry.getValue());
- return acuConf;
- }
-
- /**
- * Sets the compression type to use for data blocks. Specifying a compression may require additional libraries to be available to your Job.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param compressionType
- * one of "none", "gz", "lzo", or "snappy"
- * @since 1.6.0
- */
- public static void setCompressionType(Class<?> implementingClass, Configuration conf, String compressionType) {
- if (compressionType == null || !Arrays.asList("none", "gz", "lzo", "snappy").contains(compressionType))
- throw new IllegalArgumentException("Compression type must be one of: none, gz, lzo, snappy");
- setAccumuloProperty(implementingClass, conf, Property.TABLE_FILE_COMPRESSION_TYPE, compressionType);
- }
-
- /**
- * Sets the size for data blocks within each file.<br />
- * Data blocks are a span of key/value pairs stored in the file that are compressed and indexed as a group.
- *
- * <p>
- * Making this value smaller may increase seek performance, but at the cost of increasing the size of the indexes (which can also affect seek performance).
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param dataBlockSize
- * the block size, in bytes
- * @since 1.6.0
- */
- public static void setDataBlockSize(Class<?> implementingClass, Configuration conf, long dataBlockSize) {
- setAccumuloProperty(implementingClass, conf, Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE, dataBlockSize);
- }
-
- /**
- * Sets the size for file blocks in the file system; file blocks are managed, and replicated, by the underlying file system.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param fileBlockSize
- * the block size, in bytes
- * @since 1.6.0
- */
- public static void setFileBlockSize(Class<?> implementingClass, Configuration conf, long fileBlockSize) {
- setAccumuloProperty(implementingClass, conf, Property.TABLE_FILE_BLOCK_SIZE, fileBlockSize);
- }
-
- /**
- * Sets the size for index blocks within each file; smaller blocks means a deeper index hierarchy within the file, while larger blocks mean a more shallow
- * index hierarchy within the file. This can affect the performance of queries.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param indexBlockSize
- * the block size, in bytes
- * @since 1.6.0
- */
- public static void setIndexBlockSize(Class<?> implementingClass, Configuration conf, long indexBlockSize) {
- setAccumuloProperty(implementingClass, conf, Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX, indexBlockSize);
- }
-
- /**
- * Sets the file system replication factor for the resulting file, overriding the file system default.
- *
- * @param implementingClass
- * the class whose name will be used as a prefix for the property configuration key
- * @param conf
- * the Hadoop configuration object to configure
- * @param replication
- * the number of replicas for produced files
- * @since 1.6.0
- */
- public static void setReplication(Class<?> implementingClass, Configuration conf, int replication) {
- setAccumuloProperty(implementingClass, conf, Property.TABLE_FILE_REPLICATION, replication);
- }
-
-}
[06/12] ACCUMULO-1880 create mapreduce module
Posted by md...@apache.org.
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloOutputFormat.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloOutputFormat.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloOutputFormat.java
new file mode 100644
index 0000000..122b4cd
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloOutputFormat.java
@@ -0,0 +1,539 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapred;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.BatchWriterConfig;
+import org.apache.accumulo.core.client.ClientConfiguration;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.Instance;
+import org.apache.accumulo.core.client.MultiTableBatchWriter;
+import org.apache.accumulo.core.client.MutationsRejectedException;
+import org.apache.accumulo.core.client.TableExistsException;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.client.mapreduce.lib.impl.OutputConfigurator;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.SecurityErrorCode;
+import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
+import org.apache.accumulo.core.client.security.tokens.AuthenticationToken.AuthenticationTokenSerializer;
+import org.apache.accumulo.core.data.ColumnUpdate;
+import org.apache.accumulo.core.data.KeyExtent;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.security.ColumnVisibility;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputFormat;
+import org.apache.hadoop.mapred.RecordWriter;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.util.Progressable;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+
+/**
+ * This class allows MapReduce jobs to use Accumulo as the sink for data. This {@link OutputFormat} accepts keys and values of type {@link Text} (for a table
+ * name) and {@link Mutation} from the Map and Reduce functions.
+ *
+ * The user must specify the following via static configurator methods:
+ *
+ * <ul>
+ * <li>{@link AccumuloOutputFormat#setConnectorInfo(JobConf, String, AuthenticationToken)}
+ * <li>{@link AccumuloOutputFormat#setConnectorInfo(JobConf, String, String)}
+ * <li>{@link AccumuloOutputFormat#setZooKeeperInstance(JobConf, ClientConfiguration)} OR {@link AccumuloOutputFormat#setMockInstance(JobConf, String)}
+ * </ul>
+ *
+ * Other static methods are optional.
+ */
+public class AccumuloOutputFormat implements OutputFormat<Text,Mutation> {
+
+ private static final Class<?> CLASS = AccumuloOutputFormat.class;
+ protected static final Logger log = Logger.getLogger(CLASS);
+
+ /**
+ * Sets the connector information needed to communicate with Accumulo in this job.
+ *
+ * <p>
+ * <b>WARNING:</b> The serialized token is stored in the configuration and shared with all MapReduce tasks. It is BASE64 encoded to provide a charset safe
+ * conversion to a string, and is not intended to be secure.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param principal
+ * a valid Accumulo user name (user must have Table.CREATE permission if {@link #setCreateTables(JobConf, boolean)} is set to true)
+ * @param token
+ * the user's password
+ * @since 1.5.0
+ */
+ public static void setConnectorInfo(JobConf job, String principal, AuthenticationToken token) throws AccumuloSecurityException {
+ OutputConfigurator.setConnectorInfo(CLASS, job, principal, token);
+ }
+
+ /**
+ * Sets the connector information needed to communicate with Accumulo in this job.
+ *
+ * <p>
+ * Stores the password in a file in HDFS and pulls that into the Distributed Cache in an attempt to be more secure than storing it in the Configuration.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param principal
+ * a valid Accumulo user name (user must have Table.CREATE permission if {@link #setCreateTables(JobConf, boolean)} is set to true)
+ * @param tokenFile
+ * the path to the password file
+ * @since 1.6.0
+ */
+ public static void setConnectorInfo(JobConf job, String principal, String tokenFile) throws AccumuloSecurityException {
+ OutputConfigurator.setConnectorInfo(CLASS, job, principal, tokenFile);
+ }
+
+ /**
+ * Determines if the connector has been configured.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @return true if the connector has been configured, false otherwise
+ * @since 1.5.0
+ * @see #setConnectorInfo(JobConf, String, AuthenticationToken)
+ */
+ protected static Boolean isConnectorInfoSet(JobConf job) {
+ return OutputConfigurator.isConnectorInfoSet(CLASS, job);
+ }
+
+ /**
+ * Gets the principal from the configuration.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @return the user name
+ * @since 1.5.0
+ * @see #setConnectorInfo(JobConf, String, AuthenticationToken)
+ */
+ protected static String getPrincipal(JobConf job) {
+ return OutputConfigurator.getPrincipal(CLASS, job);
+ }
+
+ /**
+ * Gets the serialized token class from either the configuration or the token file.
+ *
+ * @since 1.5.0
+ * @deprecated since 1.6.0; Use {@link #getAuthenticationToken(JobConf)} instead.
+ */
+ @Deprecated
+ protected static String getTokenClass(JobConf job) {
+ return getAuthenticationToken(job).getClass().getName();
+ }
+
+ /**
+ * Gets the serialized token from either the configuration or the token file.
+ *
+ * @since 1.5.0
+ * @deprecated since 1.6.0; Use {@link #getAuthenticationToken(JobConf)} instead.
+ */
+ @Deprecated
+ protected static byte[] getToken(JobConf job) {
+ return AuthenticationTokenSerializer.serialize(getAuthenticationToken(job));
+ }
+
+ /**
+ * Gets the authenticated token from either the specified token file or directly from the configuration, whichever was used when the job was configured.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @return the principal's authentication token
+ * @since 1.6.0
+ * @see #setConnectorInfo(JobConf, String, AuthenticationToken)
+ * @see #setConnectorInfo(JobConf, String, String)
+ */
+ protected static AuthenticationToken getAuthenticationToken(JobConf job) {
+ return OutputConfigurator.getAuthenticationToken(CLASS, job);
+ }
+
+ /**
+ * Configures a {@link ZooKeeperInstance} for this job.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param clientConfig
+ * client configuration for specifying connection timeouts, SSL connection options, etc.
+ * @since 1.6.0
+ */
+ public static void setZooKeeperInstance(JobConf job, ClientConfiguration clientConfig) {
+ OutputConfigurator.setZooKeeperInstance(CLASS, job, clientConfig);
+ }
+
+ /**
+ * Configures a {@link MockInstance} for this job.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param instanceName
+ * the Accumulo instance name
+ * @since 1.5.0
+ */
+ public static void setMockInstance(JobConf job, String instanceName) {
+ OutputConfigurator.setMockInstance(CLASS, job, instanceName);
+ }
+
+ /**
+ * Initializes an Accumulo {@link Instance} based on the configuration.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @return an Accumulo instance
+ * @since 1.5.0
+ * @see #setZooKeeperInstance(JobConf, ClientConfiguration)
+ * @see #setMockInstance(JobConf, String)
+ */
+ protected static Instance getInstance(JobConf job) {
+ return OutputConfigurator.getInstance(CLASS, job);
+ }
+
+ /**
+ * Sets the log level for this job.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param level
+ * the logging level
+ * @since 1.5.0
+ */
+ public static void setLogLevel(JobConf job, Level level) {
+ OutputConfigurator.setLogLevel(CLASS, job, level);
+ }
+
+ /**
+ * Gets the log level from this configuration.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @return the log level
+ * @since 1.5.0
+ * @see #setLogLevel(JobConf, Level)
+ */
+ protected static Level getLogLevel(JobConf job) {
+ return OutputConfigurator.getLogLevel(CLASS, job);
+ }
+
+ /**
+ * Sets the default table name to use if one emits a null in place of a table name for a given mutation. Table names can only be alpha-numeric and
+ * underscores.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param tableName
+ * the table to use when the tablename is null in the write call
+ * @since 1.5.0
+ */
+ public static void setDefaultTableName(JobConf job, String tableName) {
+ OutputConfigurator.setDefaultTableName(CLASS, job, tableName);
+ }
+
+ /**
+ * Gets the default table name from the configuration.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @return the default table name
+ * @since 1.5.0
+ * @see #setDefaultTableName(JobConf, String)
+ */
+ protected static String getDefaultTableName(JobConf job) {
+ return OutputConfigurator.getDefaultTableName(CLASS, job);
+ }
+
+ /**
+ * Sets the configuration for for the job's {@link BatchWriter} instances. If not set, a new {@link BatchWriterConfig}, with sensible built-in defaults is
+ * used. Setting the configuration multiple times overwrites any previous configuration.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param bwConfig
+ * the configuration for the {@link BatchWriter}
+ * @since 1.5.0
+ */
+ public static void setBatchWriterOptions(JobConf job, BatchWriterConfig bwConfig) {
+ OutputConfigurator.setBatchWriterOptions(CLASS, job, bwConfig);
+ }
+
+ /**
+ * Gets the {@link BatchWriterConfig} settings.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @return the configuration object
+ * @since 1.5.0
+ * @see #setBatchWriterOptions(JobConf, BatchWriterConfig)
+ */
+ protected static BatchWriterConfig getBatchWriterOptions(JobConf job) {
+ return OutputConfigurator.getBatchWriterOptions(CLASS, job);
+ }
+
+ /**
+ * Sets the directive to create new tables, as necessary. Table names can only be alpha-numeric and underscores.
+ *
+ * <p>
+ * By default, this feature is <b>disabled</b>.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param enableFeature
+ * the feature is enabled if true, disabled otherwise
+ * @since 1.5.0
+ */
+ public static void setCreateTables(JobConf job, boolean enableFeature) {
+ OutputConfigurator.setCreateTables(CLASS, job, enableFeature);
+ }
+
+ /**
+ * Determines whether tables are permitted to be created as needed.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @return true if the feature is disabled, false otherwise
+ * @since 1.5.0
+ * @see #setCreateTables(JobConf, boolean)
+ */
+ protected static Boolean canCreateTables(JobConf job) {
+ return OutputConfigurator.canCreateTables(CLASS, job);
+ }
+
+ /**
+ * Sets the directive to use simulation mode for this job. In simulation mode, no output is produced. This is useful for testing.
+ *
+ * <p>
+ * By default, this feature is <b>disabled</b>.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param enableFeature
+ * the feature is enabled if true, disabled otherwise
+ * @since 1.5.0
+ */
+ public static void setSimulationMode(JobConf job, boolean enableFeature) {
+ OutputConfigurator.setSimulationMode(CLASS, job, enableFeature);
+ }
+
+ /**
+ * Determines whether this feature is enabled.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @return true if the feature is enabled, false otherwise
+ * @since 1.5.0
+ * @see #setSimulationMode(JobConf, boolean)
+ */
+ protected static Boolean getSimulationMode(JobConf job) {
+ return OutputConfigurator.getSimulationMode(CLASS, job);
+ }
+
+ /**
+ * A base class to be used to create {@link RecordWriter} instances that write to Accumulo.
+ */
+ protected static class AccumuloRecordWriter implements RecordWriter<Text,Mutation> {
+ private MultiTableBatchWriter mtbw = null;
+ private HashMap<Text,BatchWriter> bws = null;
+ private Text defaultTableName = null;
+
+ private boolean simulate = false;
+ private boolean createTables = false;
+
+ private long mutCount = 0;
+ private long valCount = 0;
+
+ private Connector conn;
+
+ protected AccumuloRecordWriter(JobConf job) throws AccumuloException, AccumuloSecurityException, IOException {
+ Level l = getLogLevel(job);
+ if (l != null)
+ log.setLevel(getLogLevel(job));
+ this.simulate = getSimulationMode(job);
+ this.createTables = canCreateTables(job);
+
+ if (simulate)
+ log.info("Simulating output only. No writes to tables will occur");
+
+ this.bws = new HashMap<Text,BatchWriter>();
+
+ String tname = getDefaultTableName(job);
+ this.defaultTableName = (tname == null) ? null : new Text(tname);
+
+ if (!simulate) {
+ this.conn = getInstance(job).getConnector(getPrincipal(job), getAuthenticationToken(job));
+ mtbw = conn.createMultiTableBatchWriter(getBatchWriterOptions(job));
+ }
+ }
+
+ /**
+ * Push a mutation into a table. If table is null, the defaultTable will be used. If canCreateTable is set, the table will be created if it does not exist.
+ * The table name must only contain alphanumerics and underscore.
+ */
+ @Override
+ public void write(Text table, Mutation mutation) throws IOException {
+ if (table == null || table.toString().isEmpty())
+ table = this.defaultTableName;
+
+ if (!simulate && table == null)
+ throw new IOException("No table or default table specified. Try simulation mode next time");
+
+ ++mutCount;
+ valCount += mutation.size();
+ printMutation(table, mutation);
+
+ if (simulate)
+ return;
+
+ if (!bws.containsKey(table))
+ try {
+ addTable(table);
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IOException(e);
+ }
+
+ try {
+ bws.get(table).addMutation(mutation);
+ } catch (MutationsRejectedException e) {
+ throw new IOException(e);
+ }
+ }
+
+ public void addTable(Text tableName) throws AccumuloException, AccumuloSecurityException {
+ if (simulate) {
+ log.info("Simulating adding table: " + tableName);
+ return;
+ }
+
+ log.debug("Adding table: " + tableName);
+ BatchWriter bw = null;
+ String table = tableName.toString();
+
+ if (createTables && !conn.tableOperations().exists(table)) {
+ try {
+ conn.tableOperations().create(table);
+ } catch (AccumuloSecurityException e) {
+ log.error("Accumulo security violation creating " + table, e);
+ throw e;
+ } catch (TableExistsException e) {
+ // Shouldn't happen
+ }
+ }
+
+ try {
+ bw = mtbw.getBatchWriter(table);
+ } catch (TableNotFoundException e) {
+ log.error("Accumulo table " + table + " doesn't exist and cannot be created.", e);
+ throw new AccumuloException(e);
+ } catch (AccumuloException e) {
+ throw e;
+ } catch (AccumuloSecurityException e) {
+ throw e;
+ }
+
+ if (bw != null)
+ bws.put(tableName, bw);
+ }
+
+ private int printMutation(Text table, Mutation m) {
+ if (log.isTraceEnabled()) {
+ log.trace(String.format("Table %s row key: %s", table, hexDump(m.getRow())));
+ for (ColumnUpdate cu : m.getUpdates()) {
+ log.trace(String.format("Table %s column: %s:%s", table, hexDump(cu.getColumnFamily()), hexDump(cu.getColumnQualifier())));
+ log.trace(String.format("Table %s security: %s", table, new ColumnVisibility(cu.getColumnVisibility()).toString()));
+ log.trace(String.format("Table %s value: %s", table, hexDump(cu.getValue())));
+ }
+ }
+ return m.getUpdates().size();
+ }
+
+ private String hexDump(byte[] ba) {
+ StringBuilder sb = new StringBuilder();
+ for (byte b : ba) {
+ if ((b > 0x20) && (b < 0x7e))
+ sb.append((char) b);
+ else
+ sb.append(String.format("x%02x", b));
+ }
+ return sb.toString();
+ }
+
+ @Override
+ public void close(Reporter reporter) throws IOException {
+ log.debug("mutations written: " + mutCount + ", values written: " + valCount);
+ if (simulate)
+ return;
+
+ try {
+ mtbw.close();
+ } catch (MutationsRejectedException e) {
+ if (e.getAuthorizationFailuresMap().size() >= 0) {
+ HashMap<String,Set<SecurityErrorCode>> tables = new HashMap<String,Set<SecurityErrorCode>>();
+ for (Entry<KeyExtent,Set<SecurityErrorCode>> ke : e.getAuthorizationFailuresMap().entrySet()) {
+ Set<SecurityErrorCode> secCodes = tables.get(ke.getKey().getTableId().toString());
+ if (secCodes == null) {
+ secCodes = new HashSet<SecurityErrorCode>();
+ tables.put(ke.getKey().getTableId().toString(), secCodes);
+ }
+ secCodes.addAll(ke.getValue());
+ }
+
+ log.error("Not authorized to write to tables : " + tables);
+ }
+
+ if (e.getConstraintViolationSummaries().size() > 0) {
+ log.error("Constraint violations : " + e.getConstraintViolationSummaries().size());
+ }
+ }
+ }
+ }
+
+ @Override
+ public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException {
+ if (!isConnectorInfoSet(job))
+ throw new IOException("Connector info has not been set.");
+ try {
+ // if the instance isn't configured, it will complain here
+ String principal = getPrincipal(job);
+ AuthenticationToken token = getAuthenticationToken(job);
+ Connector c = getInstance(job).getConnector(principal, token);
+ if (!c.securityOperations().authenticateUser(principal, token))
+ throw new IOException("Unable to authenticate user");
+ } catch (AccumuloException e) {
+ throw new IOException(e);
+ } catch (AccumuloSecurityException e) {
+ throw new IOException(e);
+ }
+ }
+
+ @Override
+ public RecordWriter<Text,Mutation> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
+ try {
+ return new AccumuloRecordWriter(job);
+ } catch (Exception e) {
+ throw new IOException(e);
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloRowInputFormat.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloRowInputFormat.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloRowInputFormat.java
new file mode 100644
index 0000000..673c5b8
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloRowInputFormat.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapred;
+
+import java.io.IOException;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.client.ClientConfiguration;
+import org.apache.accumulo.core.client.RowIterator;
+import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.PeekingIterator;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * This class allows MapReduce jobs to use Accumulo as the source of data. This {@link InputFormat} provides row names as {@link Text} as keys, and a
+ * corresponding {@link PeekingIterator} as a value, which in turn makes the {@link Key}/{@link Value} pairs for that row available to the Map function.
+ *
+ * The user must specify the following via static configurator methods:
+ *
+ * <ul>
+ * <li>{@link AccumuloRowInputFormat#setConnectorInfo(JobConf, String, AuthenticationToken)}
+ * <li>{@link AccumuloRowInputFormat#setInputTableName(JobConf, String)}
+ * <li>{@link AccumuloRowInputFormat#setScanAuthorizations(JobConf, Authorizations)}
+ * <li>{@link AccumuloRowInputFormat#setZooKeeperInstance(JobConf, ClientConfiguration)} OR {@link AccumuloRowInputFormat#setMockInstance(JobConf, String)}
+ * </ul>
+ *
+ * Other static methods are optional.
+ */
+public class AccumuloRowInputFormat extends InputFormatBase<Text,PeekingIterator<Entry<Key,Value>>> {
+ @Override
+ public RecordReader<Text,PeekingIterator<Entry<Key,Value>>> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
+ log.setLevel(getLogLevel(job));
+ RecordReaderBase<Text,PeekingIterator<Entry<Key,Value>>> recordReader = new RecordReaderBase<Text,PeekingIterator<Entry<Key,Value>>>() {
+ RowIterator rowIterator;
+
+ @Override
+ public void initialize(InputSplit inSplit, JobConf job) throws IOException {
+ super.initialize(inSplit, job);
+ rowIterator = new RowIterator(scannerIterator);
+ }
+
+ @Override
+ public boolean next(Text key, PeekingIterator<Entry<Key,Value>> value) throws IOException {
+ if (!rowIterator.hasNext())
+ return false;
+ value.initialize(rowIterator.next());
+ numKeysRead = rowIterator.getKVCount();
+ key.set((currentKey = value.peek().getKey()).getRow());
+ return true;
+ }
+
+ @Override
+ public Text createKey() {
+ return new Text();
+ }
+
+ @Override
+ public PeekingIterator<Entry<Key,Value>> createValue() {
+ return new PeekingIterator<Entry<Key,Value>>();
+ }
+ };
+ recordReader.initialize(split, job);
+ return recordReader;
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
new file mode 100644
index 0000000..0cee355
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
@@ -0,0 +1,383 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapred;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.accumulo.core.client.ClientSideIteratorScanner;
+import org.apache.accumulo.core.client.IsolatedScanner;
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.impl.TabletLocator;
+import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.util.Pair;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * This abstract {@link InputFormat} class allows MapReduce jobs to use Accumulo as the source of K,V pairs.
+ * <p>
+ * Subclasses must implement a {@link #getRecordReader(InputSplit, JobConf, Reporter)} to provide a {@link RecordReader} for K,V.
+ * <p>
+ * A static base class, RecordReaderBase, is provided to retrieve Accumulo {@link Key}/{@link Value} pairs, but one must implement its
+ * {@link RecordReaderBase#next(Object, Object)} to transform them to the desired generic types K,V.
+ * <p>
+ * See {@link AccumuloInputFormat} for an example implementation.
+ */
+public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
+
+ /**
+ * Sets the name of the input table, over which this job will scan.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param tableName
+ * the table to use when the tablename is null in the write call
+ * @since 1.5.0
+ */
+ public static void setInputTableName(JobConf job, String tableName) {
+ InputConfigurator.setInputTableName(CLASS, job, tableName);
+ }
+
+ /**
+ * Gets the table name from the configuration.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @return the table name
+ * @since 1.5.0
+ * @see #setInputTableName(JobConf, String)
+ */
+ protected static String getInputTableName(JobConf job) {
+ return InputConfigurator.getInputTableName(CLASS, job);
+ }
+
+ /**
+ * Sets the input ranges to scan for this job. If not set, the entire table will be scanned.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param ranges
+ * the ranges that will be mapped over
+ * @since 1.5.0
+ */
+ public static void setRanges(JobConf job, Collection<Range> ranges) {
+ InputConfigurator.setRanges(CLASS, job, ranges);
+ }
+
+ /**
+ * Gets the ranges to scan over from a job.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @return the ranges
+ * @throws IOException
+ * if the ranges have been encoded improperly
+ * @since 1.5.0
+ * @see #setRanges(JobConf, Collection)
+ */
+ protected static List<Range> getRanges(JobConf job) throws IOException {
+ return InputConfigurator.getRanges(CLASS, job);
+ }
+
+ /**
+ * Restricts the columns that will be mapped over for this job.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param columnFamilyColumnQualifierPairs
+ * a pair of {@link Text} objects corresponding to column family and column qualifier. If the column qualifier is null, the entire column family is
+ * selected. An empty set is the default and is equivalent to scanning the all columns.
+ * @since 1.5.0
+ */
+ public static void fetchColumns(JobConf job, Collection<Pair<Text,Text>> columnFamilyColumnQualifierPairs) {
+ InputConfigurator.fetchColumns(CLASS, job, columnFamilyColumnQualifierPairs);
+ }
+
+ /**
+ * Gets the columns to be mapped over from this job.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @return a set of columns
+ * @since 1.5.0
+ * @see #fetchColumns(JobConf, Collection)
+ */
+ protected static Set<Pair<Text,Text>> getFetchedColumns(JobConf job) {
+ return InputConfigurator.getFetchedColumns(CLASS, job);
+ }
+
+ /**
+ * Encode an iterator on the input for this job.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param cfg
+ * the configuration of the iterator
+ * @since 1.5.0
+ */
+ public static void addIterator(JobConf job, IteratorSetting cfg) {
+ InputConfigurator.addIterator(CLASS, job, cfg);
+ }
+
+ /**
+ * Gets a list of the iterator settings (for iterators to apply to a scanner) from this configuration.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @return a list of iterators
+ * @since 1.5.0
+ * @see #addIterator(JobConf, IteratorSetting)
+ */
+ protected static List<IteratorSetting> getIterators(JobConf job) {
+ return InputConfigurator.getIterators(CLASS, job);
+ }
+
+ /**
+ * Controls the automatic adjustment of ranges for this job. This feature merges overlapping ranges, then splits them to align with tablet boundaries.
+ * Disabling this feature will cause exactly one Map task to be created for each specified range. The default setting is enabled. *
+ *
+ * <p>
+ * By default, this feature is <b>enabled</b>.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param enableFeature
+ * the feature is enabled if true, disabled otherwise
+ * @see #setRanges(JobConf, Collection)
+ * @since 1.5.0
+ */
+ public static void setAutoAdjustRanges(JobConf job, boolean enableFeature) {
+ InputConfigurator.setAutoAdjustRanges(CLASS, job, enableFeature);
+ }
+
+ /**
+ * Determines whether a configuration has auto-adjust ranges enabled.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @return false if the feature is disabled, true otherwise
+ * @since 1.5.0
+ * @see #setAutoAdjustRanges(JobConf, boolean)
+ */
+ protected static boolean getAutoAdjustRanges(JobConf job) {
+ return InputConfigurator.getAutoAdjustRanges(CLASS, job);
+ }
+
+ /**
+ * Controls the use of the {@link IsolatedScanner} in this job.
+ *
+ * <p>
+ * By default, this feature is <b>disabled</b>.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param enableFeature
+ * the feature is enabled if true, disabled otherwise
+ * @since 1.5.0
+ */
+ public static void setScanIsolation(JobConf job, boolean enableFeature) {
+ InputConfigurator.setScanIsolation(CLASS, job, enableFeature);
+ }
+
+ /**
+ * Determines whether a configuration has isolation enabled.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @return true if the feature is enabled, false otherwise
+ * @since 1.5.0
+ * @see #setScanIsolation(JobConf, boolean)
+ */
+ protected static boolean isIsolated(JobConf job) {
+ return InputConfigurator.isIsolated(CLASS, job);
+ }
+
+ /**
+ * Controls the use of the {@link ClientSideIteratorScanner} in this job. Enabling this feature will cause the iterator stack to be constructed within the Map
+ * task, rather than within the Accumulo TServer. To use this feature, all classes needed for those iterators must be available on the classpath for the task.
+ *
+ * <p>
+ * By default, this feature is <b>disabled</b>.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param enableFeature
+ * the feature is enabled if true, disabled otherwise
+ * @since 1.5.0
+ */
+ public static void setLocalIterators(JobConf job, boolean enableFeature) {
+ InputConfigurator.setLocalIterators(CLASS, job, enableFeature);
+ }
+
+ /**
+ * Determines whether a configuration uses local iterators.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @return true if the feature is enabled, false otherwise
+ * @since 1.5.0
+ * @see #setLocalIterators(JobConf, boolean)
+ */
+ protected static boolean usesLocalIterators(JobConf job) {
+ return InputConfigurator.usesLocalIterators(CLASS, job);
+ }
+
+ /**
+ * <p>
+ * Enable reading offline tables. By default, this feature is disabled and only online tables are scanned. This will make the map reduce job directly read the
+ * table's files. If the table is not offline, then the job will fail. If the table comes online during the map reduce job, it is likely that the job will
+ * fail.
+ *
+ * <p>
+ * To use this option, the map reduce user will need access to read the Accumulo directory in HDFS.
+ *
+ * <p>
+ * Reading the offline table will create the scan time iterator stack in the map process. So any iterators that are configured for the table will need to be
+ * on the mapper's classpath.
+ *
+ * <p>
+ * One way to use this feature is to clone a table, take the clone offline, and use the clone as the input table for a map reduce job. If you plan to map
+ * reduce over the data many times, it may be better to the compact the table, clone it, take it offline, and use the clone for all map reduce jobs. The
+ * reason to do this is that compaction will reduce each tablet in the table to one file, and it is faster to read from one file.
+ *
+ * <p>
+ * There are two possible advantages to reading a tables file directly out of HDFS. First, you may see better read performance. Second, it will support
+ * speculative execution better. When reading an online table speculative execution can put more load on an already slow tablet server.
+ *
+ * <p>
+ * By default, this feature is <b>disabled</b>.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param enableFeature
+ * the feature is enabled if true, disabled otherwise
+ * @since 1.5.0
+ */
+ public static void setOfflineTableScan(JobConf job, boolean enableFeature) {
+ InputConfigurator.setOfflineTableScan(CLASS, job, enableFeature);
+ }
+
+ /**
+ * Determines whether a configuration has the offline table scan feature enabled.
+ *
+ * @param job
+ * the Hadoop context for the configured job
+ * @return true if the feature is enabled, false otherwise
+ * @since 1.5.0
+ * @see #setOfflineTableScan(JobConf, boolean)
+ */
+ protected static boolean isOfflineScan(JobConf job) {
+ return InputConfigurator.isOfflineScan(CLASS, job);
+ }
+
+ /**
+ * Initializes an Accumulo {@link org.apache.accumulo.core.client.impl.TabletLocator} based on the configuration.
+ *
+ * @param job
+ * the Hadoop job for the configured job
+ * @return an Accumulo tablet locator
+ * @throws org.apache.accumulo.core.client.TableNotFoundException
+ * if the table name set on the job doesn't exist
+ * @since 1.5.0
+ * @deprecated since 1.6.0
+ */
+ @Deprecated
+ protected static TabletLocator getTabletLocator(JobConf job) throws TableNotFoundException {
+ return InputConfigurator.getTabletLocator(CLASS, job, InputConfigurator.getInputTableName(CLASS, job));
+ }
+
+ protected abstract static class RecordReaderBase<K,V> extends AbstractRecordReader<K,V> {
+
+ @Override
+ protected void setupIterators(JobConf job, Scanner scanner, String tableName, org.apache.accumulo.core.client.mapred.RangeInputSplit split) {
+ List<IteratorSetting> iterators = null;
+
+ if (null == split) {
+ iterators = getIterators(job);
+ } else {
+ iterators = split.getIterators();
+ if (null == iterators) {
+ iterators = getIterators(job);
+ }
+ }
+
+ setupIterators(iterators, scanner);
+ }
+
+ /**
+ * Apply the configured iterators to the scanner.
+ *
+ * @param iterators
+ * the iterators to set
+ * @param scanner
+ * the scanner to configure
+ */
+ protected void setupIterators(List<IteratorSetting> iterators, Scanner scanner) {
+ for (IteratorSetting iterator : iterators) {
+ scanner.addScanIterator(iterator);
+ }
+ }
+
+ /**
+ * Apply the configured iterators from the configuration to the scanner.
+ *
+ * @param job
+ * the job configuration
+ * @param scanner
+ * the scanner to configure
+ */
+ @Deprecated
+ protected void setupIterators(JobConf job, Scanner scanner) {
+ setupIterators(getIterators(job), scanner);
+ }
+ }
+
+ /**
+ * @deprecated since 1.5.2; Use {@link org.apache.accumulo.core.client.mapred.RangeInputSplit} instead.
+ * @see org.apache.accumulo.core.client.mapred.RangeInputSplit
+ */
+ @Deprecated
+ public static class RangeInputSplit extends org.apache.accumulo.core.client.mapred.RangeInputSplit {
+ public RangeInputSplit() {
+ super();
+ }
+
+ public RangeInputSplit(RangeInputSplit other) throws IOException {
+ super(other);
+ }
+
+ public RangeInputSplit(String table, String tableId, Range range, String[] locations) {
+ super(table, tableId, range, locations);
+ }
+
+ protected RangeInputSplit(String table, Range range, String[] locations) {
+ super(table, "", range, locations);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/RangeInputSplit.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/RangeInputSplit.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/RangeInputSplit.java
new file mode 100644
index 0000000..3fd2ab0
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapred/RangeInputSplit.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapred;
+
+import java.io.IOException;
+
+import org.apache.accumulo.core.data.Range;
+import org.apache.hadoop.mapred.InputSplit;
+
+/**
+ * The Class RangeInputSplit. Encapsulates an Accumulo range for use in Map Reduce jobs.
+ */
+public class RangeInputSplit extends org.apache.accumulo.core.client.mapreduce.RangeInputSplit implements InputSplit {
+
+ public RangeInputSplit() {
+ super();
+ }
+
+ public RangeInputSplit(RangeInputSplit split) throws IOException {
+ super(split);
+ }
+
+ protected RangeInputSplit(String table, String tableId, Range range, String[] locations) {
+ super(table, tableId, range, locations);
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
new file mode 100644
index 0000000..836cff9
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
@@ -0,0 +1,672 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce;
+
+import java.io.IOException;
+import java.lang.reflect.Method;
+import java.net.InetAddress;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.ClientConfiguration;
+import org.apache.accumulo.core.client.ClientSideIteratorScanner;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.Instance;
+import org.apache.accumulo.core.client.IsolatedScanner;
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.TableDeletedException;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.TableOfflineException;
+import org.apache.accumulo.core.client.impl.OfflineScanner;
+import org.apache.accumulo.core.client.impl.ScannerImpl;
+import org.apache.accumulo.core.client.impl.Tables;
+import org.apache.accumulo.core.client.impl.TabletLocator;
+import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.KeyExtent;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.master.state.tables.TableState;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.security.Credentials;
+import org.apache.accumulo.core.util.Pair;
+import org.apache.accumulo.core.util.UtilWaitThread;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+
+/**
+ * An abstract input format to provide shared methods common to all other input format classes. At the very least, any classes inheriting from this class will
+ * need to define their own {@link RecordReader}.
+ */
+public abstract class AbstractInputFormat<K,V> extends InputFormat<K,V> {
+
+ protected static final Class<?> CLASS = AccumuloInputFormat.class;
+ protected static final Logger log = Logger.getLogger(CLASS);
+
+ /**
+ * Sets the connector information needed to communicate with Accumulo in this job.
+ *
+ * <p>
+ * <b>WARNING:</b> The serialized token is stored in the configuration and shared with all MapReduce tasks. It is BASE64 encoded to provide a charset safe
+ * conversion to a string, and is not intended to be secure.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param principal
+ * a valid Accumulo user name (user must have Table.CREATE permission)
+ * @param token
+ * the user's password
+ * @since 1.5.0
+ */
+ public static void setConnectorInfo(Job job, String principal, AuthenticationToken token) throws AccumuloSecurityException {
+ InputConfigurator.setConnectorInfo(CLASS, job.getConfiguration(), principal, token);
+ }
+
+ /**
+ * Sets the connector information needed to communicate with Accumulo in this job.
+ *
+ * <p>
+ * Stores the password in a file in HDFS and pulls that into the Distributed Cache in an attempt to be more secure than storing it in the Configuration.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param principal
+ * a valid Accumulo user name (user must have Table.CREATE permission)
+ * @param tokenFile
+ * the path to the token file
+ * @since 1.6.0
+ */
+ public static void setConnectorInfo(Job job, String principal, String tokenFile) throws AccumuloSecurityException {
+ InputConfigurator.setConnectorInfo(CLASS, job.getConfiguration(), principal, tokenFile);
+ }
+
+ /**
+ * Determines if the connector has been configured.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @return true if the connector has been configured, false otherwise
+ * @since 1.5.0
+ * @see #setConnectorInfo(Job, String, AuthenticationToken)
+ */
+ protected static Boolean isConnectorInfoSet(JobContext context) {
+ return InputConfigurator.isConnectorInfoSet(CLASS, getConfiguration(context));
+ }
+
+ /**
+ * Gets the user name from the configuration.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @return the user name
+ * @since 1.5.0
+ * @see #setConnectorInfo(Job, String, AuthenticationToken)
+ */
+ protected static String getPrincipal(JobContext context) {
+ return InputConfigurator.getPrincipal(CLASS, getConfiguration(context));
+ }
+
+ /**
+ * Gets the serialized token class from either the configuration or the token file.
+ *
+ * @since 1.5.0
+ * @deprecated since 1.6.0; Use {@link #getAuthenticationToken(JobContext)} instead.
+ */
+ @Deprecated
+ protected static String getTokenClass(JobContext context) {
+ return getAuthenticationToken(context).getClass().getName();
+ }
+
+ /**
+ * Gets the serialized token from either the configuration or the token file.
+ *
+ * @since 1.5.0
+ * @deprecated since 1.6.0; Use {@link #getAuthenticationToken(JobContext)} instead.
+ */
+ @Deprecated
+ protected static byte[] getToken(JobContext context) {
+ return AuthenticationToken.AuthenticationTokenSerializer.serialize(getAuthenticationToken(context));
+ }
+
+ /**
+ * Gets the authenticated token from either the specified token file or directly from the configuration, whichever was used when the job was configured.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @return the principal's authentication token
+ * @since 1.6.0
+ * @see #setConnectorInfo(Job, String, AuthenticationToken)
+ * @see #setConnectorInfo(Job, String, String)
+ */
+ protected static AuthenticationToken getAuthenticationToken(JobContext context) {
+ return InputConfigurator.getAuthenticationToken(CLASS, getConfiguration(context));
+ }
+
+ /**
+ * Configures a {@link org.apache.accumulo.core.client.ZooKeeperInstance} for this job.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param clientConfig
+ * client configuration containing connection options
+ * @since 1.6.0
+ */
+ public static void setZooKeeperInstance(Job job, ClientConfiguration clientConfig) {
+ InputConfigurator.setZooKeeperInstance(CLASS, job.getConfiguration(), clientConfig);
+ }
+
+ /**
+ * Configures a {@link org.apache.accumulo.core.client.mock.MockInstance} for this job.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param instanceName
+ * the Accumulo instance name
+ * @since 1.5.0
+ */
+ public static void setMockInstance(Job job, String instanceName) {
+ InputConfigurator.setMockInstance(CLASS, job.getConfiguration(), instanceName);
+ }
+
+ /**
+ * Initializes an Accumulo {@link org.apache.accumulo.core.client.Instance} based on the configuration.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @return an Accumulo instance
+ * @since 1.5.0
+ * @see #setZooKeeperInstance(Job, ClientConfiguration)
+ * @see #setMockInstance(Job, String)
+ */
+ protected static Instance getInstance(JobContext context) {
+ return InputConfigurator.getInstance(CLASS, getConfiguration(context));
+ }
+
+ /**
+ * Sets the log level for this job.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param level
+ * the logging level
+ * @since 1.5.0
+ */
+ public static void setLogLevel(Job job, Level level) {
+ InputConfigurator.setLogLevel(CLASS, job.getConfiguration(), level);
+ }
+
+ /**
+ * Gets the log level from this configuration.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @return the log level
+ * @since 1.5.0
+ * @see #setLogLevel(Job, Level)
+ */
+ protected static Level getLogLevel(JobContext context) {
+ return InputConfigurator.getLogLevel(CLASS, getConfiguration(context));
+ }
+
+ /**
+ * Sets the {@link org.apache.accumulo.core.security.Authorizations} used to scan. Must be a subset of the user's authorization. Defaults to the empty set.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param auths
+ * the user's authorizations
+ */
+ public static void setScanAuthorizations(Job job, Authorizations auths) {
+ InputConfigurator.setScanAuthorizations(CLASS, job.getConfiguration(), auths);
+ }
+
+ /**
+ * Gets the authorizations to set for the scans from the configuration.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @return the Accumulo scan authorizations
+ * @since 1.5.0
+ * @see #setScanAuthorizations(Job, Authorizations)
+ */
+ protected static Authorizations getScanAuthorizations(JobContext context) {
+ return InputConfigurator.getScanAuthorizations(CLASS, getConfiguration(context));
+ }
+
+ /**
+ * Fetches all {@link InputTableConfig}s that have been set on the given job.
+ *
+ * @param context
+ * the Hadoop job instance to be configured
+ * @return the {@link InputTableConfig} objects for the job
+ * @since 1.6.0
+ */
+ protected static Map<String,InputTableConfig> getInputTableConfigs(JobContext context) {
+ return InputConfigurator.getInputTableConfigs(CLASS, getConfiguration(context));
+ }
+
+ /**
+ * Fetches a {@link InputTableConfig} that has been set on the configuration for a specific table.
+ *
+ * <p>
+ * null is returned in the event that the table doesn't exist.
+ *
+ * @param context
+ * the Hadoop job instance to be configured
+ * @param tableName
+ * the table name for which to grab the config object
+ * @return the {@link InputTableConfig} for the given table
+ * @since 1.6.0
+ */
+ protected static InputTableConfig getInputTableConfig(JobContext context, String tableName) {
+ return InputConfigurator.getInputTableConfig(CLASS, getConfiguration(context), tableName);
+ }
+
+ /**
+ * Initializes an Accumulo {@link org.apache.accumulo.core.client.impl.TabletLocator} based on the configuration.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @param table
+ * the table for which to initialize the locator
+ * @return an Accumulo tablet locator
+ * @throws org.apache.accumulo.core.client.TableNotFoundException
+ * if the table name set on the configuration doesn't exist
+ * @since 1.6.0
+ */
+ protected static TabletLocator getTabletLocator(JobContext context, String table) throws TableNotFoundException {
+ return InputConfigurator.getTabletLocator(CLASS, getConfiguration(context), table);
+ }
+
+ // InputFormat doesn't have the equivalent of OutputFormat's checkOutputSpecs(JobContext job)
+ /**
+ * Check whether a configuration is fully configured to be used with an Accumulo {@link org.apache.hadoop.mapreduce.InputFormat}.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @throws java.io.IOException
+ * if the context is improperly configured
+ * @since 1.5.0
+ */
+ protected static void validateOptions(JobContext context) throws IOException {
+ InputConfigurator.validateOptions(CLASS, getConfiguration(context));
+ }
+
+ /**
+ * An abstract base class to be used to create {@link org.apache.hadoop.mapreduce.RecordReader} instances that convert from Accumulo
+ * {@link org.apache.accumulo.core.data.Key}/{@link org.apache.accumulo.core.data.Value} pairs to the user's K/V types.
+ *
+ * Subclasses must implement {@link #nextKeyValue()} and use it to update the following variables:
+ * <ul>
+ * <li>K {@link #currentK}</li>
+ * <li>V {@link #currentV}</li>
+ * <li>Key {@link #currentKey} (used for progress reporting)</li>
+ * <li>int {@link #numKeysRead} (used for progress reporting)</li>
+ * </ul>
+ */
+ protected abstract static class AbstractRecordReader<K,V> extends RecordReader<K,V> {
+ protected long numKeysRead;
+ protected Iterator<Map.Entry<Key,Value>> scannerIterator;
+ protected RangeInputSplit split;
+
+ /**
+ * Configures the iterators on a scanner for the given table name.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @param scanner
+ * the scanner for which to configure the iterators
+ * @param tableName
+ * the table name for which the scanner is configured
+ * @since 1.6.0
+ */
+ protected abstract void setupIterators(TaskAttemptContext context, Scanner scanner, String tableName, RangeInputSplit split);
+
+ /**
+ * Initialize a scanner over the given input split using this task attempt configuration.
+ */
+ @Override
+ public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IOException {
+
+ Scanner scanner;
+ split = (RangeInputSplit) inSplit;
+ log.debug("Initializing input split: " + split.getRange());
+
+ Instance instance = split.getInstance();
+ if (null == instance) {
+ instance = getInstance(attempt);
+ }
+
+ String principal = split.getPrincipal();
+ if (null == principal) {
+ principal = getPrincipal(attempt);
+ }
+
+ AuthenticationToken token = split.getToken();
+ if (null == token) {
+ token = getAuthenticationToken(attempt);
+ }
+
+ Authorizations authorizations = split.getAuths();
+ if (null == authorizations) {
+ authorizations = getScanAuthorizations(attempt);
+ }
+
+ String table = split.getTableName();
+
+ // in case the table name changed, we can still use the previous name for terms of configuration,
+ // but the scanner will use the table id resolved at job setup time
+ InputTableConfig tableConfig = getInputTableConfig(attempt, split.getTableName());
+
+ Boolean isOffline = split.isOffline();
+ if (null == isOffline) {
+ isOffline = tableConfig.isOfflineScan();
+ }
+
+ Boolean isIsolated = split.isIsolatedScan();
+ if (null == isIsolated) {
+ isIsolated = tableConfig.shouldUseIsolatedScanners();
+ }
+
+ Boolean usesLocalIterators = split.usesLocalIterators();
+ if (null == usesLocalIterators) {
+ usesLocalIterators = tableConfig.shouldUseLocalIterators();
+ }
+
+ List<IteratorSetting> iterators = split.getIterators();
+ if (null == iterators) {
+ iterators = tableConfig.getIterators();
+ }
+
+ Collection<Pair<Text,Text>> columns = split.getFetchedColumns();
+ if (null == columns) {
+ columns = tableConfig.getFetchedColumns();
+ }
+
+ try {
+ log.debug("Creating connector with user: " + principal);
+ log.debug("Creating scanner for table: " + table);
+ log.debug("Authorizations are: " + authorizations);
+ if (isOffline) {
+ scanner = new OfflineScanner(instance, new Credentials(principal, token), split.getTableId(), authorizations);
+ } else if (instance instanceof MockInstance) {
+ scanner = instance.getConnector(principal, token).createScanner(split.getTableName(), authorizations);
+ } else {
+ scanner = new ScannerImpl(instance, new Credentials(principal, token), split.getTableId(), authorizations);
+ }
+ if (isIsolated) {
+ log.info("Creating isolated scanner");
+ scanner = new IsolatedScanner(scanner);
+ }
+ if (usesLocalIterators) {
+ log.info("Using local iterators");
+ scanner = new ClientSideIteratorScanner(scanner);
+ }
+
+ setupIterators(attempt, scanner, split.getTableName(), split);
+ } catch (Exception e) {
+ throw new IOException(e);
+ }
+
+ // setup a scanner within the bounds of this split
+ for (Pair<Text,Text> c : columns) {
+ if (c.getSecond() != null) {
+ log.debug("Fetching column " + c.getFirst() + ":" + c.getSecond());
+ scanner.fetchColumn(c.getFirst(), c.getSecond());
+ } else {
+ log.debug("Fetching column family " + c.getFirst());
+ scanner.fetchColumnFamily(c.getFirst());
+ }
+ }
+
+ scanner.setRange(split.getRange());
+ numKeysRead = 0;
+
+ // do this last after setting all scanner options
+ scannerIterator = scanner.iterator();
+ }
+
+ @Override
+ public void close() {}
+
+ @Override
+ public float getProgress() throws IOException {
+ if (numKeysRead > 0 && currentKey == null)
+ return 1.0f;
+ return split.getProgress(currentKey);
+ }
+
+ /**
+ * The Key that should be returned to the client
+ */
+ protected K currentK = null;
+
+ /**
+ * The Value that should be return to the client
+ */
+ protected V currentV = null;
+
+ /**
+ * The Key that is used to determine progress in the current InputSplit. It is not returned to the client and is only used internally
+ */
+ protected Key currentKey = null;
+
+ @Override
+ public K getCurrentKey() throws IOException, InterruptedException {
+ return currentK;
+ }
+
+ @Override
+ public V getCurrentValue() throws IOException, InterruptedException {
+ return currentV;
+ }
+ }
+
+ Map<String,Map<KeyExtent,List<Range>>> binOfflineTable(JobContext context, String tableId, List<Range> ranges) throws TableNotFoundException,
+ AccumuloException, AccumuloSecurityException {
+
+ Instance instance = getInstance(context);
+ Connector conn = instance.getConnector(getPrincipal(context), getAuthenticationToken(context));
+
+ return InputConfigurator.binOffline(tableId, ranges, instance, conn);
+ }
+
+ /**
+ * Gets the splits of the tables that have been set on the job.
+ *
+ * @param context
+ * the configuration of the job
+ * @return the splits from the tables based on the ranges.
+ * @throws java.io.IOException
+ * if a table set on the job doesn't exist or an error occurs initializing the tablet locator
+ */
+ @Override
+ public List<InputSplit> getSplits(JobContext context) throws IOException {
+ Level logLevel = getLogLevel(context);
+ log.setLevel(logLevel);
+ validateOptions(context);
+ Random random = new Random();
+ LinkedList<InputSplit> splits = new LinkedList<InputSplit>();
+ Map<String,InputTableConfig> tableConfigs = getInputTableConfigs(context);
+ for (Map.Entry<String,InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) {
+
+ String tableName = tableConfigEntry.getKey();
+ InputTableConfig tableConfig = tableConfigEntry.getValue();
+
+ Instance instance = getInstance(context);
+ boolean mockInstance;
+ String tableId;
+ // resolve table name to id once, and use id from this point forward
+ if (instance instanceof MockInstance) {
+ tableId = "";
+ mockInstance = true;
+ } else {
+ try {
+ tableId = Tables.getTableId(instance, tableName);
+ } catch (TableNotFoundException e) {
+ throw new IOException(e);
+ }
+ mockInstance = false;
+ }
+
+ Authorizations auths = getScanAuthorizations(context);
+ String principal = getPrincipal(context);
+ AuthenticationToken token = getAuthenticationToken(context);
+
+ boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
+ List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges();
+ if (ranges.isEmpty()) {
+ ranges = new ArrayList<Range>(1);
+ ranges.add(new Range());
+ }
+
+ // get the metadata information for these ranges
+ Map<String,Map<KeyExtent,List<Range>>> binnedRanges = new HashMap<String,Map<KeyExtent,List<Range>>>();
+ TabletLocator tl;
+ try {
+ if (tableConfig.isOfflineScan()) {
+ binnedRanges = binOfflineTable(context, tableId, ranges);
+ while (binnedRanges == null) {
+ // Some tablets were still online, try again
+ UtilWaitThread.sleep(100 + random.nextInt(100)); // sleep randomly between 100 and 200 ms
+ binnedRanges = binOfflineTable(context, tableId, ranges);
+
+ }
+ } else {
+ tl = getTabletLocator(context, tableId);
+ // its possible that the cache could contain complete, but old information about a tables tablets... so clear it
+ tl.invalidateCache();
+ Credentials creds = new Credentials(getPrincipal(context), getAuthenticationToken(context));
+
+ while (!tl.binRanges(creds, ranges, binnedRanges).isEmpty()) {
+ if (!(instance instanceof MockInstance)) {
+ if (!Tables.exists(instance, tableId))
+ throw new TableDeletedException(tableId);
+ if (Tables.getTableState(instance, tableId) == TableState.OFFLINE)
+ throw new TableOfflineException(instance, tableId);
+ }
+ binnedRanges.clear();
+ log.warn("Unable to locate bins for specified ranges. Retrying.");
+ UtilWaitThread.sleep(100 + random.nextInt(100)); // sleep randomly between 100 and 200 ms
+ tl.invalidateCache();
+ }
+ }
+ } catch (Exception e) {
+ throw new IOException(e);
+ }
+
+ HashMap<Range,ArrayList<String>> splitsToAdd = null;
+
+ if (!autoAdjust)
+ splitsToAdd = new HashMap<Range,ArrayList<String>>();
+
+ HashMap<String,String> hostNameCache = new HashMap<String,String>();
+ for (Map.Entry<String,Map<KeyExtent,List<Range>>> tserverBin : binnedRanges.entrySet()) {
+ String ip = tserverBin.getKey().split(":", 2)[0];
+ String location = hostNameCache.get(ip);
+ if (location == null) {
+ InetAddress inetAddress = InetAddress.getByName(ip);
+ location = inetAddress.getCanonicalHostName();
+ hostNameCache.put(ip, location);
+ }
+ for (Map.Entry<KeyExtent,List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
+ Range ke = extentRanges.getKey().toDataRange();
+ for (Range r : extentRanges.getValue()) {
+ if (autoAdjust) {
+ // divide ranges into smaller ranges, based on the tablets
+ RangeInputSplit split = new RangeInputSplit(tableName, tableId, ke.clip(r), new String[] {location});
+
+ split.setOffline(tableConfig.isOfflineScan());
+ split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
+ split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
+ split.setMockInstance(mockInstance);
+ split.setFetchedColumns(tableConfig.getFetchedColumns());
+ split.setPrincipal(principal);
+ split.setToken(token);
+ split.setInstanceName(instance.getInstanceName());
+ split.setZooKeepers(instance.getZooKeepers());
+ split.setAuths(auths);
+ split.setIterators(tableConfig.getIterators());
+ split.setLogLevel(logLevel);
+
+ splits.add(split);
+ } else {
+ // don't divide ranges
+ ArrayList<String> locations = splitsToAdd.get(r);
+ if (locations == null)
+ locations = new ArrayList<String>(1);
+ locations.add(location);
+ splitsToAdd.put(r, locations);
+ }
+ }
+ }
+ }
+
+ if (!autoAdjust)
+ for (Map.Entry<Range,ArrayList<String>> entry : splitsToAdd.entrySet()) {
+ RangeInputSplit split = new RangeInputSplit(tableName, tableId, entry.getKey(), entry.getValue().toArray(new String[0]));
+
+ split.setOffline(tableConfig.isOfflineScan());
+ split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
+ split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
+ split.setMockInstance(mockInstance);
+ split.setFetchedColumns(tableConfig.getFetchedColumns());
+ split.setPrincipal(principal);
+ split.setToken(token);
+ split.setInstanceName(instance.getInstanceName());
+ split.setZooKeepers(instance.getZooKeepers());
+ split.setAuths(auths);
+ split.setIterators(tableConfig.getIterators());
+ split.setLogLevel(logLevel);
+
+ splits.add(split);
+ }
+ }
+ return splits;
+ }
+
+ // use reflection to pull the Configuration out of the JobContext for Hadoop 1 and Hadoop 2 compatibility
+ static Configuration getConfiguration(JobContext context) {
+ try {
+ Class<?> c = AbstractInputFormat.class.getClassLoader().loadClass("org.apache.hadoop.mapreduce.JobContext");
+ Method m = c.getMethod("getConfiguration");
+ Object o = m.invoke(context, new Object[0]);
+ return (Configuration) o;
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormat.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormat.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormat.java
new file mode 100644
index 0000000..196fb04
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormat.java
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.accumulo.core.client.mapreduce.lib.impl.FileOutputConfigurator;
+import org.apache.accumulo.core.conf.AccumuloConfiguration;
+import org.apache.accumulo.core.conf.Property;
+import org.apache.accumulo.core.data.ArrayByteSequence;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.file.FileOperations;
+import org.apache.accumulo.core.file.FileSKVWriter;
+import org.apache.accumulo.core.security.ColumnVisibility;
+import org.apache.commons.collections.map.LRUMap;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.log4j.Logger;
+
+/**
+ * This class allows MapReduce jobs to write output in the Accumulo data file format.<br />
+ * Care should be taken to write only sorted data (sorted by {@link Key}), as this is an important requirement of Accumulo data files.
+ *
+ * <p>
+ * The output path to be created must be specified via {@link AccumuloFileOutputFormat#setOutputPath(Job, Path)}. This is inherited from
+ * {@link FileOutputFormat#setOutputPath(Job, Path)}. Other methods from {@link FileOutputFormat} are not supported and may be ignored or cause failures. Using
+ * other Hadoop configuration options that affect the behavior of the underlying files directly in the Job's configuration may work, but are not directly
+ * supported at this time.
+ */
+public class AccumuloFileOutputFormat extends FileOutputFormat<Key,Value> {
+
+ private static final Class<?> CLASS = AccumuloFileOutputFormat.class;
+ protected static final Logger log = Logger.getLogger(CLASS);
+
+ /**
+ * This helper method provides an AccumuloConfiguration object constructed from the Accumulo defaults, and overridden with Accumulo properties that have been
+ * stored in the Job's configuration.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @since 1.5.0
+ */
+ protected static AccumuloConfiguration getAccumuloConfiguration(JobContext context) {
+ return FileOutputConfigurator.getAccumuloConfiguration(CLASS, InputFormatBase.getConfiguration(context));
+ }
+
+ /**
+ * Sets the compression type to use for data blocks. Specifying a compression may require additional libraries to be available to your Job.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param compressionType
+ * one of "none", "gz", "lzo", or "snappy"
+ * @since 1.5.0
+ */
+ public static void setCompressionType(Job job, String compressionType) {
+ FileOutputConfigurator.setCompressionType(CLASS, job.getConfiguration(), compressionType);
+ }
+
+ /**
+ * Sets the size for data blocks within each file.<br />
+ * Data blocks are a span of key/value pairs stored in the file that are compressed and indexed as a group.
+ *
+ * <p>
+ * Making this value smaller may increase seek performance, but at the cost of increasing the size of the indexes (which can also affect seek performance).
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param dataBlockSize
+ * the block size, in bytes
+ * @since 1.5.0
+ */
+ public static void setDataBlockSize(Job job, long dataBlockSize) {
+ FileOutputConfigurator.setDataBlockSize(CLASS, job.getConfiguration(), dataBlockSize);
+ }
+
+ /**
+ * Sets the size for file blocks in the file system; file blocks are managed, and replicated, by the underlying file system.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param fileBlockSize
+ * the block size, in bytes
+ * @since 1.5.0
+ */
+ public static void setFileBlockSize(Job job, long fileBlockSize) {
+ FileOutputConfigurator.setFileBlockSize(CLASS, job.getConfiguration(), fileBlockSize);
+ }
+
+ /**
+ * Sets the size for index blocks within each file; smaller blocks means a deeper index hierarchy within the file, while larger blocks mean a more shallow
+ * index hierarchy within the file. This can affect the performance of queries.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param indexBlockSize
+ * the block size, in bytes
+ * @since 1.5.0
+ */
+ public static void setIndexBlockSize(Job job, long indexBlockSize) {
+ FileOutputConfigurator.setIndexBlockSize(CLASS, job.getConfiguration(), indexBlockSize);
+ }
+
+ /**
+ * Sets the file system replication factor for the resulting file, overriding the file system default.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param replication
+ * the number of replicas for produced files
+ * @since 1.5.0
+ */
+ public static void setReplication(Job job, int replication) {
+ FileOutputConfigurator.setReplication(CLASS, job.getConfiguration(), replication);
+ }
+
+ @Override
+ public RecordWriter<Key,Value> getRecordWriter(TaskAttemptContext context) throws IOException {
+ // get the path of the temporary output file
+ final Configuration conf = InputFormatBase.getConfiguration(context);
+ final AccumuloConfiguration acuConf = getAccumuloConfiguration(context);
+
+ final String extension = acuConf.get(Property.TABLE_FILE_TYPE);
+ final Path file = this.getDefaultWorkFile(context, "." + extension);
+
+ final LRUMap validVisibilities = new LRUMap(1000);
+
+ return new RecordWriter<Key,Value>() {
+ FileSKVWriter out = null;
+
+ @Override
+ public void close(TaskAttemptContext context) throws IOException {
+ if (out != null)
+ out.close();
+ }
+
+ @Override
+ public void write(Key key, Value value) throws IOException {
+
+ Boolean wasChecked = (Boolean) validVisibilities.get(key.getColumnVisibilityData());
+ if (wasChecked == null) {
+ byte[] cv = key.getColumnVisibilityData().toArray();
+ new ColumnVisibility(cv);
+ validVisibilities.put(new ArrayByteSequence(Arrays.copyOf(cv, cv.length)), Boolean.TRUE);
+ }
+
+ if (out == null) {
+ out = FileOperations.getInstance().openWriter(file.toString(), file.getFileSystem(conf), conf, acuConf);
+ out.startDefaultLocalityGroup();
+ }
+ out.append(key, value);
+ }
+ };
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java
new file mode 100644
index 0000000..21a0280
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce;
+
+import java.io.IOException;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.client.ClientConfiguration;
+import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.format.DefaultFormatter;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.log4j.Level;
+
+/**
+ * This class allows MapReduce jobs to use Accumulo as the source of data. This {@link InputFormat} provides keys and values of type {@link Key} and
+ * {@link Value} to the Map function.
+ *
+ * The user must specify the following via static configurator methods:
+ *
+ * <ul>
+ * <li>{@link AccumuloInputFormat#setConnectorInfo(Job, String, AuthenticationToken)}
+ * <li>{@link AccumuloInputFormat#setScanAuthorizations(Job, Authorizations)}
+ * <li>{@link AccumuloInputFormat#setZooKeeperInstance(Job, ClientConfiguration)} OR {@link AccumuloInputFormat#setMockInstance(Job, String)}
+ * </ul>
+ *
+ * Other static methods are optional.
+ */
+public class AccumuloInputFormat extends InputFormatBase<Key,Value> {
+
+ @Override
+ public RecordReader<Key,Value> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
+ log.setLevel(getLogLevel(context));
+
+ // Override the log level from the configuration as if the RangeInputSplit has one it's the more correct one to use.
+ if (split instanceof org.apache.accumulo.core.client.mapreduce.RangeInputSplit) {
+ org.apache.accumulo.core.client.mapreduce.RangeInputSplit risplit = (org.apache.accumulo.core.client.mapreduce.RangeInputSplit) split;
+ Level level = risplit.getLogLevel();
+ if (null != level) {
+ log.setLevel(level);
+ }
+ }
+
+ return new RecordReaderBase<Key,Value>() {
+
+ @Override
+ public boolean nextKeyValue() throws IOException, InterruptedException {
+ if (scannerIterator.hasNext()) {
+ ++numKeysRead;
+ Entry<Key,Value> entry = scannerIterator.next();
+ currentK = currentKey = entry.getKey();
+ currentV = entry.getValue();
+ if (log.isTraceEnabled())
+ log.trace("Processing key/value pair: " + DefaultFormatter.formatEntry(entry, true));
+ return true;
+ }
+ return false;
+ }
+ };
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormat.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormat.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormat.java
new file mode 100644
index 0000000..af1001f
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormat.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.accumulo.core.client.ClientConfiguration;
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
+import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.format.DefaultFormatter;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+/**
+ * This class allows MapReduce jobs to use multiple Accumulo tables as the source of data. This {@link org.apache.hadoop.mapreduce.InputFormat} provides keys
+ * and values of type {@link Key} and {@link Value} to the Map function.
+ *
+ * The user must specify the following via static configurator methods:
+ *
+ * <ul>
+ * <li>{@link AccumuloMultiTableInputFormat#setConnectorInfo(Job, String, AuthenticationToken)}
+ * <li>{@link AccumuloMultiTableInputFormat#setScanAuthorizations(Job, Authorizations)}
+ * <li>{@link AccumuloMultiTableInputFormat#setZooKeeperInstance(Job, ClientConfiguration)} OR {@link AccumuloInputFormat#setMockInstance(Job, String)}
+ * <li>{@link AccumuloMultiTableInputFormat#setInputTableConfigs(Job, Map)}
+ * </ul>
+ *
+ * Other static methods are optional.
+ */
+public class AccumuloMultiTableInputFormat extends AbstractInputFormat<Key,Value> {
+
+ /**
+ * Sets the {@link InputTableConfig} objects on the given Hadoop configuration
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param configs
+ * the table query configs to be set on the configuration.
+ * @since 1.6.0
+ */
+ public static void setInputTableConfigs(Job job, Map<String,InputTableConfig> configs) {
+ checkNotNull(configs);
+ InputConfigurator.setInputTableConfigs(CLASS, getConfiguration(job), configs);
+ }
+
+ @Override
+ public RecordReader<Key,Value> createRecordReader(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
+ log.setLevel(getLogLevel(context));
+ return new AbstractRecordReader<Key,Value>() {
+ @Override
+ public boolean nextKeyValue() throws IOException, InterruptedException {
+ if (scannerIterator.hasNext()) {
+ ++numKeysRead;
+ Map.Entry<Key,Value> entry = scannerIterator.next();
+ currentK = currentKey = entry.getKey();
+ currentV = entry.getValue();
+ if (log.isTraceEnabled())
+ log.trace("Processing key/value pair: " + DefaultFormatter.formatEntry(entry, true));
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ protected void setupIterators(TaskAttemptContext context, Scanner scanner, String tableName, RangeInputSplit split) {
+ List<IteratorSetting> iterators = split.getIterators();
+ if (null == iterators) {
+ iterators = getInputTableConfig(context, tableName).getIterators();
+ }
+
+ for (IteratorSetting setting : iterators) {
+ scanner.addScanIterator(setting);
+ }
+ }
+ };
+ }
+}
[05/12] ACCUMULO-1880 create mapreduce module
Posted by md...@apache.org.
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloOutputFormat.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloOutputFormat.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloOutputFormat.java
new file mode 100644
index 0000000..af9bbae
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloOutputFormat.java
@@ -0,0 +1,545 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.BatchWriterConfig;
+import org.apache.accumulo.core.client.ClientConfiguration;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.Instance;
+import org.apache.accumulo.core.client.MultiTableBatchWriter;
+import org.apache.accumulo.core.client.MutationsRejectedException;
+import org.apache.accumulo.core.client.TableExistsException;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.client.mapreduce.lib.impl.OutputConfigurator;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.SecurityErrorCode;
+import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
+import org.apache.accumulo.core.client.security.tokens.AuthenticationToken.AuthenticationTokenSerializer;
+import org.apache.accumulo.core.data.ColumnUpdate;
+import org.apache.accumulo.core.data.KeyExtent;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.security.ColumnVisibility;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+
+/**
+ * This class allows MapReduce jobs to use Accumulo as the sink for data. This {@link OutputFormat} accepts keys and values of type {@link Text} (for a table
+ * name) and {@link Mutation} from the Map and Reduce functions.
+ *
+ * The user must specify the following via static configurator methods:
+ *
+ * <ul>
+ * <li>{@link AccumuloOutputFormat#setConnectorInfo(Job, String, AuthenticationToken)}
+ * <li>{@link AccumuloOutputFormat#setConnectorInfo(Job, String, String)}
+ * <li>{@link AccumuloOutputFormat#setZooKeeperInstance(Job, ClientConfiguration)} OR {@link AccumuloOutputFormat#setMockInstance(Job, String)}
+ * </ul>
+ *
+ * Other static methods are optional.
+ */
+public class AccumuloOutputFormat extends OutputFormat<Text,Mutation> {
+
+ private static final Class<?> CLASS = AccumuloOutputFormat.class;
+ protected static final Logger log = Logger.getLogger(CLASS);
+
+ /**
+ * Sets the connector information needed to communicate with Accumulo in this job.
+ *
+ * <p>
+ * <b>WARNING:</b> The serialized token is stored in the configuration and shared with all MapReduce tasks. It is BASE64 encoded to provide a charset safe
+ * conversion to a string, and is not intended to be secure.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param principal
+ * a valid Accumulo user name (user must have Table.CREATE permission if {@link #setCreateTables(Job, boolean)} is set to true)
+ * @param token
+ * the user's password
+ * @since 1.5.0
+ */
+ public static void setConnectorInfo(Job job, String principal, AuthenticationToken token) throws AccumuloSecurityException {
+ OutputConfigurator.setConnectorInfo(CLASS, job.getConfiguration(), principal, token);
+ }
+
+ /**
+ * Sets the connector information needed to communicate with Accumulo in this job.
+ *
+ * <p>
+ * Stores the password in a file in HDFS and pulls that into the Distributed Cache in an attempt to be more secure than storing it in the Configuration.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param principal
+ * a valid Accumulo user name (user must have Table.CREATE permission if {@link #setCreateTables(Job, boolean)} is set to true)
+ * @param tokenFile
+ * the path to the token file
+ * @since 1.6.0
+ */
+ public static void setConnectorInfo(Job job, String principal, String tokenFile) throws AccumuloSecurityException {
+ OutputConfigurator.setConnectorInfo(CLASS, job.getConfiguration(), principal, tokenFile);
+ }
+
+ /**
+ * Determines if the connector has been configured.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @return true if the connector has been configured, false otherwise
+ * @since 1.5.0
+ * @see #setConnectorInfo(Job, String, AuthenticationToken)
+ */
+ protected static Boolean isConnectorInfoSet(JobContext context) {
+ return OutputConfigurator.isConnectorInfoSet(CLASS, InputFormatBase.getConfiguration(context));
+ }
+
+ /**
+ * Gets the user name from the configuration.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @return the user name
+ * @since 1.5.0
+ * @see #setConnectorInfo(Job, String, AuthenticationToken)
+ */
+ protected static String getPrincipal(JobContext context) {
+ return OutputConfigurator.getPrincipal(CLASS, InputFormatBase.getConfiguration(context));
+ }
+
+ /**
+ * Gets the serialized token class from either the configuration or the token file.
+ *
+ * @since 1.5.0
+ * @deprecated since 1.6.0; Use {@link #getAuthenticationToken(JobContext)} instead.
+ */
+ @Deprecated
+ protected static String getTokenClass(JobContext context) {
+ return getAuthenticationToken(context).getClass().getName();
+ }
+
+ /**
+ * Gets the serialized token from either the configuration or the token file.
+ *
+ * @since 1.5.0
+ * @deprecated since 1.6.0; Use {@link #getAuthenticationToken(JobContext)} instead.
+ */
+ @Deprecated
+ protected static byte[] getToken(JobContext context) {
+ return AuthenticationTokenSerializer.serialize(getAuthenticationToken(context));
+ }
+
+ /**
+ * Gets the authenticated token from either the specified token file or directly from the configuration, whichever was used when the job was configured.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @return the principal's authentication token
+ * @since 1.6.0
+ * @see #setConnectorInfo(Job, String, AuthenticationToken)
+ * @see #setConnectorInfo(Job, String, String)
+ */
+ protected static AuthenticationToken getAuthenticationToken(JobContext context) {
+ return OutputConfigurator.getAuthenticationToken(CLASS, InputFormatBase.getConfiguration(context));
+ }
+
+ /**
+ * Configures a {@link ZooKeeperInstance} for this job.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param clientConfig
+ * client configuration for specifying connection timeouts, SSL connection options, etc.
+ * @since 1.6.0
+ */
+ public static void setZooKeeperInstance(Job job, ClientConfiguration clientConfig) {
+ OutputConfigurator.setZooKeeperInstance(CLASS, job.getConfiguration(), clientConfig);
+ }
+
+ /**
+ * Configures a {@link MockInstance} for this job.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param instanceName
+ * the Accumulo instance name
+ * @since 1.5.0
+ */
+ public static void setMockInstance(Job job, String instanceName) {
+ OutputConfigurator.setMockInstance(CLASS, job.getConfiguration(), instanceName);
+ }
+
+ /**
+ * Initializes an Accumulo {@link Instance} based on the configuration.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @return an Accumulo instance
+ * @since 1.5.0
+ * @see #setZooKeeperInstance(Job, ClientConfiguration)
+ * @see #setMockInstance(Job, String)
+ */
+ protected static Instance getInstance(JobContext context) {
+ return OutputConfigurator.getInstance(CLASS, InputFormatBase.getConfiguration(context));
+ }
+
+ /**
+ * Sets the log level for this job.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param level
+ * the logging level
+ * @since 1.5.0
+ */
+ public static void setLogLevel(Job job, Level level) {
+ OutputConfigurator.setLogLevel(CLASS, job.getConfiguration(), level);
+ }
+
+ /**
+ * Gets the log level from this configuration.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @return the log level
+ * @since 1.5.0
+ * @see #setLogLevel(Job, Level)
+ */
+ protected static Level getLogLevel(JobContext context) {
+ return OutputConfigurator.getLogLevel(CLASS, InputFormatBase.getConfiguration(context));
+ }
+
+ /**
+ * Sets the default table name to use if one emits a null in place of a table name for a given mutation. Table names can only be alpha-numeric and
+ * underscores.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param tableName
+ * the table to use when the tablename is null in the write call
+ * @since 1.5.0
+ */
+ public static void setDefaultTableName(Job job, String tableName) {
+ OutputConfigurator.setDefaultTableName(CLASS, job.getConfiguration(), tableName);
+ }
+
+ /**
+ * Gets the default table name from the configuration.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @return the default table name
+ * @since 1.5.0
+ * @see #setDefaultTableName(Job, String)
+ */
+ protected static String getDefaultTableName(JobContext context) {
+ return OutputConfigurator.getDefaultTableName(CLASS, InputFormatBase.getConfiguration(context));
+ }
+
+ /**
+ * Sets the configuration for for the job's {@link BatchWriter} instances. If not set, a new {@link BatchWriterConfig}, with sensible built-in defaults is
+ * used. Setting the configuration multiple times overwrites any previous configuration.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param bwConfig
+ * the configuration for the {@link BatchWriter}
+ * @since 1.5.0
+ */
+ public static void setBatchWriterOptions(Job job, BatchWriterConfig bwConfig) {
+ OutputConfigurator.setBatchWriterOptions(CLASS, job.getConfiguration(), bwConfig);
+ }
+
+ /**
+ * Gets the {@link BatchWriterConfig} settings.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @return the configuration object
+ * @since 1.5.0
+ * @see #setBatchWriterOptions(Job, BatchWriterConfig)
+ */
+ protected static BatchWriterConfig getBatchWriterOptions(JobContext context) {
+ return OutputConfigurator.getBatchWriterOptions(CLASS, InputFormatBase.getConfiguration(context));
+ }
+
+ /**
+ * Sets the directive to create new tables, as necessary. Table names can only be alpha-numeric and underscores.
+ *
+ * <p>
+ * By default, this feature is <b>disabled</b>.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param enableFeature
+ * the feature is enabled if true, disabled otherwise
+ * @since 1.5.0
+ */
+ public static void setCreateTables(Job job, boolean enableFeature) {
+ OutputConfigurator.setCreateTables(CLASS, job.getConfiguration(), enableFeature);
+ }
+
+ /**
+ * Determines whether tables are permitted to be created as needed.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @return true if the feature is disabled, false otherwise
+ * @since 1.5.0
+ * @see #setCreateTables(Job, boolean)
+ */
+ protected static Boolean canCreateTables(JobContext context) {
+ return OutputConfigurator.canCreateTables(CLASS, InputFormatBase.getConfiguration(context));
+ }
+
+ /**
+ * Sets the directive to use simulation mode for this job. In simulation mode, no output is produced. This is useful for testing.
+ *
+ * <p>
+ * By default, this feature is <b>disabled</b>.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param enableFeature
+ * the feature is enabled if true, disabled otherwise
+ * @since 1.5.0
+ */
+ public static void setSimulationMode(Job job, boolean enableFeature) {
+ OutputConfigurator.setSimulationMode(CLASS, job.getConfiguration(), enableFeature);
+ }
+
+ /**
+ * Determines whether this feature is enabled.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @return true if the feature is enabled, false otherwise
+ * @since 1.5.0
+ * @see #setSimulationMode(Job, boolean)
+ */
+ protected static Boolean getSimulationMode(JobContext context) {
+ return OutputConfigurator.getSimulationMode(CLASS, InputFormatBase.getConfiguration(context));
+ }
+
+ /**
+ * A base class to be used to create {@link RecordWriter} instances that write to Accumulo.
+ */
+ protected static class AccumuloRecordWriter extends RecordWriter<Text,Mutation> {
+ private MultiTableBatchWriter mtbw = null;
+ private HashMap<Text,BatchWriter> bws = null;
+ private Text defaultTableName = null;
+
+ private boolean simulate = false;
+ private boolean createTables = false;
+
+ private long mutCount = 0;
+ private long valCount = 0;
+
+ private Connector conn;
+
+ protected AccumuloRecordWriter(TaskAttemptContext context) throws AccumuloException, AccumuloSecurityException, IOException {
+ Level l = getLogLevel(context);
+ if (l != null)
+ log.setLevel(getLogLevel(context));
+ this.simulate = getSimulationMode(context);
+ this.createTables = canCreateTables(context);
+
+ if (simulate)
+ log.info("Simulating output only. No writes to tables will occur");
+
+ this.bws = new HashMap<Text,BatchWriter>();
+
+ String tname = getDefaultTableName(context);
+ this.defaultTableName = (tname == null) ? null : new Text(tname);
+
+ if (!simulate) {
+ this.conn = getInstance(context).getConnector(getPrincipal(context), getAuthenticationToken(context));
+ mtbw = conn.createMultiTableBatchWriter(getBatchWriterOptions(context));
+ }
+ }
+
+ /**
+ * Push a mutation into a table. If table is null, the defaultTable will be used. If canCreateTable is set, the table will be created if it does not exist.
+ * The table name must only contain alphanumerics and underscore.
+ */
+ @Override
+ public void write(Text table, Mutation mutation) throws IOException {
+ if (table == null || table.toString().isEmpty())
+ table = this.defaultTableName;
+
+ if (!simulate && table == null)
+ throw new IOException("No table or default table specified. Try simulation mode next time");
+
+ ++mutCount;
+ valCount += mutation.size();
+ printMutation(table, mutation);
+
+ if (simulate)
+ return;
+
+ if (!bws.containsKey(table))
+ try {
+ addTable(table);
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IOException(e);
+ }
+
+ try {
+ bws.get(table).addMutation(mutation);
+ } catch (MutationsRejectedException e) {
+ throw new IOException(e);
+ }
+ }
+
+ public void addTable(Text tableName) throws AccumuloException, AccumuloSecurityException {
+ if (simulate) {
+ log.info("Simulating adding table: " + tableName);
+ return;
+ }
+
+ log.debug("Adding table: " + tableName);
+ BatchWriter bw = null;
+ String table = tableName.toString();
+
+ if (createTables && !conn.tableOperations().exists(table)) {
+ try {
+ conn.tableOperations().create(table);
+ } catch (AccumuloSecurityException e) {
+ log.error("Accumulo security violation creating " + table, e);
+ throw e;
+ } catch (TableExistsException e) {
+ // Shouldn't happen
+ }
+ }
+
+ try {
+ bw = mtbw.getBatchWriter(table);
+ } catch (TableNotFoundException e) {
+ log.error("Accumulo table " + table + " doesn't exist and cannot be created.", e);
+ throw new AccumuloException(e);
+ } catch (AccumuloException e) {
+ throw e;
+ } catch (AccumuloSecurityException e) {
+ throw e;
+ }
+
+ if (bw != null)
+ bws.put(tableName, bw);
+ }
+
+ private int printMutation(Text table, Mutation m) {
+ if (log.isTraceEnabled()) {
+ log.trace(String.format("Table %s row key: %s", table, hexDump(m.getRow())));
+ for (ColumnUpdate cu : m.getUpdates()) {
+ log.trace(String.format("Table %s column: %s:%s", table, hexDump(cu.getColumnFamily()), hexDump(cu.getColumnQualifier())));
+ log.trace(String.format("Table %s security: %s", table, new ColumnVisibility(cu.getColumnVisibility()).toString()));
+ log.trace(String.format("Table %s value: %s", table, hexDump(cu.getValue())));
+ }
+ }
+ return m.getUpdates().size();
+ }
+
+ private String hexDump(byte[] ba) {
+ StringBuilder sb = new StringBuilder();
+ for (byte b : ba) {
+ if ((b > 0x20) && (b < 0x7e))
+ sb.append((char) b);
+ else
+ sb.append(String.format("x%02x", b));
+ }
+ return sb.toString();
+ }
+
+ @Override
+ public void close(TaskAttemptContext attempt) throws IOException, InterruptedException {
+ log.debug("mutations written: " + mutCount + ", values written: " + valCount);
+ if (simulate)
+ return;
+
+ try {
+ mtbw.close();
+ } catch (MutationsRejectedException e) {
+ if (e.getAuthorizationFailuresMap().size() >= 0) {
+ HashMap<String,Set<SecurityErrorCode>> tables = new HashMap<String,Set<SecurityErrorCode>>();
+ for (Entry<KeyExtent,Set<SecurityErrorCode>> ke : e.getAuthorizationFailuresMap().entrySet()) {
+ Set<SecurityErrorCode> secCodes = tables.get(ke.getKey().getTableId().toString());
+ if (secCodes == null) {
+ secCodes = new HashSet<SecurityErrorCode>();
+ tables.put(ke.getKey().getTableId().toString(), secCodes);
+ }
+ secCodes.addAll(ke.getValue());
+ }
+
+ log.error("Not authorized to write to tables : " + tables);
+ }
+
+ if (e.getConstraintViolationSummaries().size() > 0) {
+ log.error("Constraint violations : " + e.getConstraintViolationSummaries().size());
+ }
+ }
+ }
+ }
+
+ @Override
+ public void checkOutputSpecs(JobContext job) throws IOException {
+ if (!isConnectorInfoSet(job))
+ throw new IOException("Connector info has not been set.");
+ try {
+ // if the instance isn't configured, it will complain here
+ String principal = getPrincipal(job);
+ AuthenticationToken token = getAuthenticationToken(job);
+ Connector c = getInstance(job).getConnector(principal, token);
+ if (!c.securityOperations().authenticateUser(principal, token))
+ throw new IOException("Unable to authenticate user");
+ } catch (AccumuloException e) {
+ throw new IOException(e);
+ } catch (AccumuloSecurityException e) {
+ throw new IOException(e);
+ }
+ }
+
+ @Override
+ public OutputCommitter getOutputCommitter(TaskAttemptContext context) {
+ return new NullOutputFormat<Text,Mutation>().getOutputCommitter(context);
+ }
+
+ @Override
+ public RecordWriter<Text,Mutation> getRecordWriter(TaskAttemptContext attempt) throws IOException {
+ try {
+ return new AccumuloRecordWriter(attempt);
+ } catch (Exception e) {
+ throw new IOException(e);
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloRowInputFormat.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloRowInputFormat.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloRowInputFormat.java
new file mode 100644
index 0000000..37caf15
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloRowInputFormat.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce;
+
+import java.io.IOException;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.client.ClientConfiguration;
+import org.apache.accumulo.core.client.RowIterator;
+import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.PeekingIterator;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+/**
+ * This class allows MapReduce jobs to use Accumulo as the source of data. This {@link InputFormat} provides row names as {@link Text} as keys, and a
+ * corresponding {@link PeekingIterator} as a value, which in turn makes the {@link Key}/{@link Value} pairs for that row available to the Map function.
+ *
+ * The user must specify the following via static configurator methods:
+ *
+ * <ul>
+ * <li>{@link AccumuloRowInputFormat#setConnectorInfo(Job, String, AuthenticationToken)}
+ * <li>{@link AccumuloRowInputFormat#setInputTableName(Job, String)}
+ * <li>{@link AccumuloRowInputFormat#setScanAuthorizations(Job, Authorizations)}
+ * <li>{@link AccumuloRowInputFormat#setZooKeeperInstance(Job, ClientConfiguration)} OR {@link AccumuloRowInputFormat#setMockInstance(Job, String)}
+ * </ul>
+ *
+ * Other static methods are optional.
+ */
+public class AccumuloRowInputFormat extends InputFormatBase<Text,PeekingIterator<Entry<Key,Value>>> {
+ @Override
+ public RecordReader<Text,PeekingIterator<Entry<Key,Value>>> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException,
+ InterruptedException {
+ log.setLevel(getLogLevel(context));
+ return new RecordReaderBase<Text,PeekingIterator<Entry<Key,Value>>>() {
+ RowIterator rowIterator;
+
+ @Override
+ public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IOException {
+ super.initialize(inSplit, attempt);
+ rowIterator = new RowIterator(scannerIterator);
+ currentK = new Text();
+ currentV = null;
+ }
+
+ @Override
+ public boolean nextKeyValue() throws IOException, InterruptedException {
+ if (!rowIterator.hasNext())
+ return false;
+ currentV = new PeekingIterator<Entry<Key,Value>>(rowIterator.next());
+ numKeysRead = rowIterator.getKVCount();
+ currentKey = currentV.peek().getKey();
+ currentK = new Text(currentKey.getRow());
+ return true;
+ }
+ };
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
new file mode 100644
index 0000000..e58e350
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
@@ -0,0 +1,384 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.accumulo.core.client.ClientSideIteratorScanner;
+import org.apache.accumulo.core.client.IsolatedScanner;
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.impl.TabletLocator;
+import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.util.Pair;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+/**
+ * This abstract {@link InputFormat} class allows MapReduce jobs to use Accumulo as the source of K,V pairs.
+ * <p>
+ * Subclasses must implement a {@link #createRecordReader(InputSplit, TaskAttemptContext)} to provide a {@link RecordReader} for K,V.
+ * <p>
+ * A static base class, RecordReaderBase, is provided to retrieve Accumulo {@link Key}/{@link Value} pairs, but one must implement its
+ * {@link RecordReaderBase#nextKeyValue()} to transform them to the desired generic types K,V.
+ * <p>
+ * See {@link AccumuloInputFormat} for an example implementation.
+ */
+public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
+
+ /**
+ * Gets the table name from the configuration.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @return the table name
+ * @since 1.5.0
+ * @see #setInputTableName(Job, String)
+ */
+ protected static String getInputTableName(JobContext context) {
+ return InputConfigurator.getInputTableName(CLASS, getConfiguration(context));
+ }
+
+ /**
+ * Sets the name of the input table, over which this job will scan.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param tableName
+ * the table to use when the tablename is null in the write call
+ * @since 1.5.0
+ */
+ public static void setInputTableName(Job job, String tableName) {
+ InputConfigurator.setInputTableName(CLASS, job.getConfiguration(), tableName);
+ }
+
+ /**
+ * Sets the input ranges to scan for the single input table associated with this job.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param ranges
+ * the ranges that will be mapped over
+ * @since 1.5.0
+ */
+ public static void setRanges(Job job, Collection<Range> ranges) {
+ InputConfigurator.setRanges(CLASS, job.getConfiguration(), ranges);
+ }
+
+ /**
+ * Gets the ranges to scan over from a job.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @return the ranges
+ * @since 1.5.0
+ * @see #setRanges(Job, Collection)
+ */
+ protected static List<Range> getRanges(JobContext context) throws IOException {
+ return InputConfigurator.getRanges(CLASS, getConfiguration(context));
+ }
+
+ /**
+ * Restricts the columns that will be mapped over for this job for the default input table.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param columnFamilyColumnQualifierPairs
+ * a pair of {@link Text} objects corresponding to column family and column qualifier. If the column qualifier is null, the entire column family is
+ * selected. An empty set is the default and is equivalent to scanning the all columns.
+ * @since 1.5.0
+ */
+ public static void fetchColumns(Job job, Collection<Pair<Text,Text>> columnFamilyColumnQualifierPairs) {
+ InputConfigurator.fetchColumns(CLASS, job.getConfiguration(), columnFamilyColumnQualifierPairs);
+ }
+
+ /**
+ * Gets the columns to be mapped over from this job.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @return a set of columns
+ * @since 1.5.0
+ * @see #fetchColumns(Job, Collection)
+ */
+ protected static Set<Pair<Text,Text>> getFetchedColumns(JobContext context) {
+ return InputConfigurator.getFetchedColumns(CLASS, getConfiguration(context));
+ }
+
+ /**
+ * Encode an iterator on the single input table for this job.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param cfg
+ * the configuration of the iterator
+ * @since 1.5.0
+ */
+ public static void addIterator(Job job, IteratorSetting cfg) {
+ InputConfigurator.addIterator(CLASS, job.getConfiguration(), cfg);
+ }
+
+ /**
+ * Gets a list of the iterator settings (for iterators to apply to a scanner) from this configuration.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @return a list of iterators
+ * @since 1.5.0
+ * @see #addIterator(Job, IteratorSetting)
+ */
+ protected static List<IteratorSetting> getIterators(JobContext context) {
+ return InputConfigurator.getIterators(CLASS, getConfiguration(context));
+ }
+
+ /**
+ * Controls the automatic adjustment of ranges for this job. This feature merges overlapping ranges, then splits them to align with tablet boundaries.
+ * Disabling this feature will cause exactly one Map task to be created for each specified range. The default setting is enabled. *
+ *
+ * <p>
+ * By default, this feature is <b>enabled</b>.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param enableFeature
+ * the feature is enabled if true, disabled otherwise
+ * @see #setRanges(Job, Collection)
+ * @since 1.5.0
+ */
+ public static void setAutoAdjustRanges(Job job, boolean enableFeature) {
+ InputConfigurator.setAutoAdjustRanges(CLASS, job.getConfiguration(), enableFeature);
+ }
+
+ /**
+ * Determines whether a configuration has auto-adjust ranges enabled.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @return false if the feature is disabled, true otherwise
+ * @since 1.5.0
+ * @see #setAutoAdjustRanges(Job, boolean)
+ */
+ protected static boolean getAutoAdjustRanges(JobContext context) {
+ return InputConfigurator.getAutoAdjustRanges(CLASS, getConfiguration(context));
+ }
+
+ /**
+ * Controls the use of the {@link IsolatedScanner} in this job.
+ *
+ * <p>
+ * By default, this feature is <b>disabled</b>.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param enableFeature
+ * the feature is enabled if true, disabled otherwise
+ * @since 1.5.0
+ */
+ public static void setScanIsolation(Job job, boolean enableFeature) {
+ InputConfigurator.setScanIsolation(CLASS, job.getConfiguration(), enableFeature);
+ }
+
+ /**
+ * Determines whether a configuration has isolation enabled.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @return true if the feature is enabled, false otherwise
+ * @since 1.5.0
+ * @see #setScanIsolation(Job, boolean)
+ */
+ protected static boolean isIsolated(JobContext context) {
+ return InputConfigurator.isIsolated(CLASS, getConfiguration(context));
+ }
+
+ /**
+ * Controls the use of the {@link ClientSideIteratorScanner} in this job. Enabling this feature will cause the iterator stack to be constructed within the Map
+ * task, rather than within the Accumulo TServer. To use this feature, all classes needed for those iterators must be available on the classpath for the task.
+ *
+ * <p>
+ * By default, this feature is <b>disabled</b>.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param enableFeature
+ * the feature is enabled if true, disabled otherwise
+ * @since 1.5.0
+ */
+ public static void setLocalIterators(Job job, boolean enableFeature) {
+ InputConfigurator.setLocalIterators(CLASS, job.getConfiguration(), enableFeature);
+ }
+
+ /**
+ * Determines whether a configuration uses local iterators.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @return true if the feature is enabled, false otherwise
+ * @since 1.5.0
+ * @see #setLocalIterators(Job, boolean)
+ */
+ protected static boolean usesLocalIterators(JobContext context) {
+ return InputConfigurator.usesLocalIterators(CLASS, getConfiguration(context));
+ }
+
+ /**
+ * <p>
+ * Enable reading offline tables. By default, this feature is disabled and only online tables are scanned. This will make the map reduce job directly read the
+ * table's files. If the table is not offline, then the job will fail. If the table comes online during the map reduce job, it is likely that the job will
+ * fail.
+ *
+ * <p>
+ * To use this option, the map reduce user will need access to read the Accumulo directory in HDFS.
+ *
+ * <p>
+ * Reading the offline table will create the scan time iterator stack in the map process. So any iterators that are configured for the table will need to be
+ * on the mapper's classpath.
+ *
+ * <p>
+ * One way to use this feature is to clone a table, take the clone offline, and use the clone as the input table for a map reduce job. If you plan to map
+ * reduce over the data many times, it may be better to the compact the table, clone it, take it offline, and use the clone for all map reduce jobs. The
+ * reason to do this is that compaction will reduce each tablet in the table to one file, and it is faster to read from one file.
+ *
+ * <p>
+ * There are two possible advantages to reading a tables file directly out of HDFS. First, you may see better read performance. Second, it will support
+ * speculative execution better. When reading an online table speculative execution can put more load on an already slow tablet server.
+ *
+ * <p>
+ * By default, this feature is <b>disabled</b>.
+ *
+ * @param job
+ * the Hadoop job instance to be configured
+ * @param enableFeature
+ * the feature is enabled if true, disabled otherwise
+ * @since 1.5.0
+ */
+ public static void setOfflineTableScan(Job job, boolean enableFeature) {
+ InputConfigurator.setOfflineTableScan(CLASS, job.getConfiguration(), enableFeature);
+ }
+
+ /**
+ * Determines whether a configuration has the offline table scan feature enabled.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @return true if the feature is enabled, false otherwise
+ * @since 1.5.0
+ * @see #setOfflineTableScan(Job, boolean)
+ */
+ protected static boolean isOfflineScan(JobContext context) {
+ return InputConfigurator.isOfflineScan(CLASS, getConfiguration(context));
+ }
+
+ /**
+ * Initializes an Accumulo {@link org.apache.accumulo.core.client.impl.TabletLocator} based on the configuration.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @return an Accumulo tablet locator
+ * @throws org.apache.accumulo.core.client.TableNotFoundException
+ * if the table name set on the configuration doesn't exist
+ * @since 1.5.0
+ * @deprecated since 1.6.0
+ */
+ @Deprecated
+ protected static TabletLocator getTabletLocator(JobContext context) throws TableNotFoundException {
+ return InputConfigurator.getTabletLocator(CLASS, getConfiguration(context), InputConfigurator.getInputTableName(CLASS, getConfiguration(context)));
+ }
+
+ protected abstract static class RecordReaderBase<K,V> extends AbstractRecordReader<K,V> {
+
+ /**
+ * Apply the configured iterators from the configuration to the scanner for the specified table name
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @param scanner
+ * the scanner to configure
+ * @since 1.6.0
+ */
+ @Override
+ protected void setupIterators(TaskAttemptContext context, Scanner scanner, String tableName, org.apache.accumulo.core.client.mapreduce.RangeInputSplit split) {
+ setupIterators(context, scanner, split);
+ }
+
+ /**
+ * Apply the configured iterators from the configuration to the scanner.
+ *
+ * @param context
+ * the Hadoop context for the configured job
+ * @param scanner
+ * the scanner to configure
+ */
+ @Deprecated
+ protected void setupIterators(TaskAttemptContext context, Scanner scanner) {
+ setupIterators(context, scanner, null);
+ }
+
+ /**
+ * Initialize a scanner over the given input split using this task attempt configuration.
+ */
+ protected void setupIterators(TaskAttemptContext context, Scanner scanner, org.apache.accumulo.core.client.mapreduce.RangeInputSplit split) {
+ List<IteratorSetting> iterators = null;
+ if (null == split) {
+ iterators = getIterators(context);
+ } else {
+ iterators = split.getIterators();
+ if (null == iterators) {
+ iterators = getIterators(context);
+ }
+ }
+ for (IteratorSetting iterator : iterators)
+ scanner.addScanIterator(iterator);
+ }
+ }
+
+ /**
+ * @deprecated since 1.5.2; Use {@link org.apache.accumulo.core.client.mapreduce.RangeInputSplit} instead.
+ * @see org.apache.accumulo.core.client.mapreduce.RangeInputSplit
+ */
+ @Deprecated
+ public static class RangeInputSplit extends org.apache.accumulo.core.client.mapreduce.RangeInputSplit {
+
+ public RangeInputSplit() {
+ super();
+ }
+
+ public RangeInputSplit(RangeInputSplit other) throws IOException {
+ super(other);
+ }
+
+ protected RangeInputSplit(String table, Range range, String[] locations) {
+ super(table, "", range, locations);
+ }
+
+ public RangeInputSplit(String table, String tableId, Range range, String[] locations) {
+ super(table, tableId, range, locations);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/InputTableConfig.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/InputTableConfig.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/InputTableConfig.java
new file mode 100644
index 0000000..e59451e
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/InputTableConfig.java
@@ -0,0 +1,367 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.util.Pair;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * This class to holds a batch scan configuration for a table. It contains all the properties needed to specify how rows should be returned from the table.
+ */
+public class InputTableConfig implements Writable {
+
+ private List<IteratorSetting> iterators;
+ private List<Range> ranges;
+ private Collection<Pair<Text,Text>> columns;
+
+ private boolean autoAdjustRanges = true;
+ private boolean useLocalIterators = false;
+ private boolean useIsolatedScanners = false;
+ private boolean offlineScan = false;
+
+ public InputTableConfig() {}
+
+ /**
+ * Creates a batch scan config object out of a previously serialized batch scan config object.
+ *
+ * @param input
+ * the data input of the serialized batch scan config
+ */
+ public InputTableConfig(DataInput input) throws IOException {
+ readFields(input);
+ }
+
+ /**
+ * Sets the input ranges to scan for all tables associated with this job. This will be added to any per-table ranges that have been set using
+ *
+ * @param ranges
+ * the ranges that will be mapped over
+ * @since 1.6.0
+ */
+ public InputTableConfig setRanges(List<Range> ranges) {
+ this.ranges = ranges;
+ return this;
+ }
+
+ /**
+ * Returns the ranges to be queried in the configuration
+ */
+ public List<Range> getRanges() {
+ return ranges != null ? ranges : new ArrayList<Range>();
+ }
+
+ /**
+ * Restricts the columns that will be mapped over for this job for the default input table.
+ *
+ * @param columns
+ * a pair of {@link Text} objects corresponding to column family and column qualifier. If the column qualifier is null, the entire column family is
+ * selected. An empty set is the default and is equivalent to scanning the all columns.
+ * @since 1.6.0
+ */
+ public InputTableConfig fetchColumns(Collection<Pair<Text,Text>> columns) {
+ this.columns = columns;
+ return this;
+ }
+
+ /**
+ * Returns the columns to be fetched for this configuration
+ */
+ public Collection<Pair<Text,Text>> getFetchedColumns() {
+ return columns != null ? columns : new HashSet<Pair<Text,Text>>();
+ }
+
+ /**
+ * Set iterators on to be used in the query.
+ *
+ * @param iterators
+ * the configurations for the iterators
+ * @since 1.6.0
+ */
+ public InputTableConfig setIterators(List<IteratorSetting> iterators) {
+ this.iterators = iterators;
+ return this;
+ }
+
+ /**
+ * Returns the iterators to be set on this configuration
+ */
+ public List<IteratorSetting> getIterators() {
+ return iterators != null ? iterators : new ArrayList<IteratorSetting>();
+ }
+
+ /**
+ * Controls the automatic adjustment of ranges for this job. This feature merges overlapping ranges, then splits them to align with tablet boundaries.
+ * Disabling this feature will cause exactly one Map task to be created for each specified range. The default setting is enabled. *
+ *
+ * <p>
+ * By default, this feature is <b>enabled</b>.
+ *
+ * @param autoAdjustRanges
+ * the feature is enabled if true, disabled otherwise
+ * @see #setRanges(java.util.List)
+ * @since 1.6.0
+ */
+ public InputTableConfig setAutoAdjustRanges(boolean autoAdjustRanges) {
+ this.autoAdjustRanges = autoAdjustRanges;
+ return this;
+ }
+
+ /**
+ * Determines whether a configuration has auto-adjust ranges enabled.
+ *
+ * @return false if the feature is disabled, true otherwise
+ * @since 1.6.0
+ * @see #setAutoAdjustRanges(boolean)
+ */
+ public boolean shouldAutoAdjustRanges() {
+ return autoAdjustRanges;
+ }
+
+ /**
+ * Controls the use of the {@link org.apache.accumulo.core.client.ClientSideIteratorScanner} in this job. Enabling this feature will cause the iterator stack
+ * to be constructed within the Map task, rather than within the Accumulo TServer. To use this feature, all classes needed for those iterators must be
+ * available on the classpath for the task.
+ *
+ * <p>
+ * By default, this feature is <b>disabled</b>.
+ *
+ * @param useLocalIterators
+ * the feature is enabled if true, disabled otherwise
+ * @since 1.6.0
+ */
+ public InputTableConfig setUseLocalIterators(boolean useLocalIterators) {
+ this.useLocalIterators = useLocalIterators;
+ return this;
+ }
+
+ /**
+ * Determines whether a configuration uses local iterators.
+ *
+ * @return true if the feature is enabled, false otherwise
+ * @since 1.6.0
+ * @see #setUseLocalIterators(boolean)
+ */
+ public boolean shouldUseLocalIterators() {
+ return useLocalIterators;
+ }
+
+ /**
+ * <p>
+ * Enable reading offline tables. By default, this feature is disabled and only online tables are scanned. This will make the map reduce job directly read the
+ * table's files. If the table is not offline, then the job will fail. If the table comes online during the map reduce job, it is likely that the job will
+ * fail.
+ *
+ * <p>
+ * To use this option, the map reduce user will need access to read the Accumulo directory in HDFS.
+ *
+ * <p>
+ * Reading the offline table will create the scan time iterator stack in the map process. So any iterators that are configured for the table will need to be
+ * on the mapper's classpath. The accumulo-site.xml may need to be on the mapper's classpath if HDFS or the Accumulo directory in HDFS are non-standard.
+ *
+ * <p>
+ * One way to use this feature is to clone a table, take the clone offline, and use the clone as the input table for a map reduce job. If you plan to map
+ * reduce over the data many times, it may be better to the compact the table, clone it, take it offline, and use the clone for all map reduce jobs. The
+ * reason to do this is that compaction will reduce each tablet in the table to one file, and it is faster to read from one file.
+ *
+ * <p>
+ * There are two possible advantages to reading a tables file directly out of HDFS. First, you may see better read performance. Second, it will support
+ * speculative execution better. When reading an online table speculative execution can put more load on an already slow tablet server.
+ *
+ * <p>
+ * By default, this feature is <b>disabled</b>.
+ *
+ * @param offlineScan
+ * the feature is enabled if true, disabled otherwise
+ * @since 1.6.0
+ */
+ public InputTableConfig setOfflineScan(boolean offlineScan) {
+ this.offlineScan = offlineScan;
+ return this;
+ }
+
+ /**
+ * Determines whether a configuration has the offline table scan feature enabled.
+ *
+ * @return true if the feature is enabled, false otherwise
+ * @since 1.6.0
+ * @see #setOfflineScan(boolean)
+ */
+ public boolean isOfflineScan() {
+ return offlineScan;
+ }
+
+ /**
+ * Controls the use of the {@link org.apache.accumulo.core.client.IsolatedScanner} in this job.
+ *
+ * <p>
+ * By default, this feature is <b>disabled</b>.
+ *
+ * @param useIsolatedScanners
+ * the feature is enabled if true, disabled otherwise
+ * @since 1.6.0
+ */
+ public InputTableConfig setUseIsolatedScanners(boolean useIsolatedScanners) {
+ this.useIsolatedScanners = useIsolatedScanners;
+ return this;
+ }
+
+ /**
+ * Determines whether a configuration has isolation enabled.
+ *
+ * @return true if the feature is enabled, false otherwise
+ * @since 1.6.0
+ * @see #setUseIsolatedScanners(boolean)
+ */
+ public boolean shouldUseIsolatedScanners() {
+ return useIsolatedScanners;
+ }
+
+ /**
+ * Writes the state for the current object out to the specified {@link DataOutput}
+ *
+ * @param dataOutput
+ * the output for which to write the object's state
+ */
+ @Override
+ public void write(DataOutput dataOutput) throws IOException {
+ if (iterators != null) {
+ dataOutput.writeInt(iterators.size());
+ for (IteratorSetting setting : iterators)
+ setting.write(dataOutput);
+ } else {
+ dataOutput.writeInt(0);
+ }
+ if (ranges != null) {
+ dataOutput.writeInt(ranges.size());
+ for (Range range : ranges)
+ range.write(dataOutput);
+ } else {
+ dataOutput.writeInt(0);
+ }
+ if (columns != null) {
+ dataOutput.writeInt(columns.size());
+ for (Pair<Text,Text> column : columns) {
+ if (column.getSecond() == null) {
+ dataOutput.writeInt(1);
+ column.getFirst().write(dataOutput);
+ } else {
+ dataOutput.writeInt(2);
+ column.getFirst().write(dataOutput);
+ column.getSecond().write(dataOutput);
+ }
+ }
+ } else {
+ dataOutput.writeInt(0);
+ }
+ dataOutput.writeBoolean(autoAdjustRanges);
+ dataOutput.writeBoolean(useLocalIterators);
+ dataOutput.writeBoolean(useIsolatedScanners);
+ }
+
+ /**
+ * Reads the fields in the {@link DataInput} into the current object
+ *
+ * @param dataInput
+ * the input fields to read into the current object
+ */
+ @Override
+ public void readFields(DataInput dataInput) throws IOException {
+ // load iterators
+ long iterSize = dataInput.readInt();
+ if (iterSize > 0)
+ iterators = new ArrayList<IteratorSetting>();
+ for (int i = 0; i < iterSize; i++)
+ iterators.add(new IteratorSetting(dataInput));
+ // load ranges
+ long rangeSize = dataInput.readInt();
+ if (rangeSize > 0)
+ ranges = new ArrayList<Range>();
+ for (int i = 0; i < rangeSize; i++) {
+ Range range = new Range();
+ range.readFields(dataInput);
+ ranges.add(range);
+ }
+ // load columns
+ long columnSize = dataInput.readInt();
+ if (columnSize > 0)
+ columns = new HashSet<Pair<Text,Text>>();
+ for (int i = 0; i < columnSize; i++) {
+ long numPairs = dataInput.readInt();
+ Text colFam = new Text();
+ colFam.readFields(dataInput);
+ if (numPairs == 1) {
+ columns.add(new Pair<Text,Text>(colFam, null));
+ } else if (numPairs == 2) {
+ Text colQual = new Text();
+ colQual.readFields(dataInput);
+ columns.add(new Pair<Text,Text>(colFam, colQual));
+ }
+ }
+ autoAdjustRanges = dataInput.readBoolean();
+ useLocalIterators = dataInput.readBoolean();
+ useIsolatedScanners = dataInput.readBoolean();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o)
+ return true;
+ if (o == null || getClass() != o.getClass())
+ return false;
+
+ InputTableConfig that = (InputTableConfig) o;
+
+ if (autoAdjustRanges != that.autoAdjustRanges)
+ return false;
+ if (offlineScan != that.offlineScan)
+ return false;
+ if (useIsolatedScanners != that.useIsolatedScanners)
+ return false;
+ if (useLocalIterators != that.useLocalIterators)
+ return false;
+ if (columns != null ? !columns.equals(that.columns) : that.columns != null)
+ return false;
+ if (iterators != null ? !iterators.equals(that.iterators) : that.iterators != null)
+ return false;
+ if (ranges != null ? !ranges.equals(that.ranges) : that.ranges != null)
+ return false;
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = 31 * (iterators != null ? iterators.hashCode() : 0);
+ result = 31 * result + (ranges != null ? ranges.hashCode() : 0);
+ result = 31 * result + (columns != null ? columns.hashCode() : 0);
+ result = 31 * result + (autoAdjustRanges ? 1 : 0);
+ result = 31 * result + (useLocalIterators ? 1 : 0);
+ result = 31 * result + (useIsolatedScanners ? 1 : 0);
+ result = 31 * result + (offlineScan ? 1 : 0);
+ return result;
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
new file mode 100644
index 0000000..4b5a149
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
@@ -0,0 +1,490 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.math.BigInteger;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.accumulo.core.client.ClientConfiguration;
+import org.apache.accumulo.core.client.Instance;
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
+import org.apache.accumulo.core.client.mapreduce.lib.impl.ConfiguratorBase.TokenSource;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
+import org.apache.accumulo.core.client.security.tokens.AuthenticationToken.AuthenticationTokenSerializer;
+import org.apache.accumulo.core.data.ByteSequence;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.PartialKey;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.Pair;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.log4j.Level;
+
+/**
+ * The Class RangeInputSplit. Encapsulates an Accumulo range for use in Map Reduce jobs.
+ */
+public class RangeInputSplit extends InputSplit implements Writable {
+ private Range range;
+ private String[] locations;
+ private String tableId, tableName, instanceName, zooKeepers, principal;
+ private TokenSource tokenSource;
+ private String tokenFile;
+ private AuthenticationToken token;
+ private Boolean offline, mockInstance, isolatedScan, localIterators;
+ private Authorizations auths;
+ private Set<Pair<Text,Text>> fetchedColumns;
+ private List<IteratorSetting> iterators;
+ private Level level;
+
+ public RangeInputSplit() {
+ range = new Range();
+ locations = new String[0];
+ tableName = "";
+ tableId = "";
+ }
+
+ public RangeInputSplit(RangeInputSplit split) throws IOException {
+ this.setRange(split.getRange());
+ this.setLocations(split.getLocations());
+ this.setTableName(split.getTableName());
+ this.setTableId(split.getTableId());
+ }
+
+ protected RangeInputSplit(String table, String tableId, Range range, String[] locations) {
+ this.range = range;
+ setLocations(locations);
+ this.tableName = table;
+ this.tableId = tableId;
+ }
+
+ public Range getRange() {
+ return range;
+ }
+
+ private static byte[] extractBytes(ByteSequence seq, int numBytes) {
+ byte[] bytes = new byte[numBytes + 1];
+ bytes[0] = 0;
+ for (int i = 0; i < numBytes; i++) {
+ if (i >= seq.length())
+ bytes[i + 1] = 0;
+ else
+ bytes[i + 1] = seq.byteAt(i);
+ }
+ return bytes;
+ }
+
+ public static float getProgress(ByteSequence start, ByteSequence end, ByteSequence position) {
+ int maxDepth = Math.min(Math.max(end.length(), start.length()), position.length());
+ BigInteger startBI = new BigInteger(extractBytes(start, maxDepth));
+ BigInteger endBI = new BigInteger(extractBytes(end, maxDepth));
+ BigInteger positionBI = new BigInteger(extractBytes(position, maxDepth));
+ return (float) (positionBI.subtract(startBI).doubleValue() / endBI.subtract(startBI).doubleValue());
+ }
+
+ public float getProgress(Key currentKey) {
+ if (currentKey == null)
+ return 0f;
+ if (range.getStartKey() != null && range.getEndKey() != null) {
+ if (range.getStartKey().compareTo(range.getEndKey(), PartialKey.ROW) != 0) {
+ // just look at the row progress
+ return getProgress(range.getStartKey().getRowData(), range.getEndKey().getRowData(), currentKey.getRowData());
+ } else if (range.getStartKey().compareTo(range.getEndKey(), PartialKey.ROW_COLFAM) != 0) {
+ // just look at the column family progress
+ return getProgress(range.getStartKey().getColumnFamilyData(), range.getEndKey().getColumnFamilyData(), currentKey.getColumnFamilyData());
+ } else if (range.getStartKey().compareTo(range.getEndKey(), PartialKey.ROW_COLFAM_COLQUAL) != 0) {
+ // just look at the column qualifier progress
+ return getProgress(range.getStartKey().getColumnQualifierData(), range.getEndKey().getColumnQualifierData(), currentKey.getColumnQualifierData());
+ }
+ }
+ // if we can't figure it out, then claim no progress
+ return 0f;
+ }
+
+ /**
+ * This implementation of length is only an estimate, it does not provide exact values. Do not have your code rely on this return value.
+ */
+ @Override
+ public long getLength() throws IOException {
+ Text startRow = range.isInfiniteStartKey() ? new Text(new byte[] {Byte.MIN_VALUE}) : range.getStartKey().getRow();
+ Text stopRow = range.isInfiniteStopKey() ? new Text(new byte[] {Byte.MAX_VALUE}) : range.getEndKey().getRow();
+ int maxCommon = Math.min(7, Math.min(startRow.getLength(), stopRow.getLength()));
+ long diff = 0;
+
+ byte[] start = startRow.getBytes();
+ byte[] stop = stopRow.getBytes();
+ for (int i = 0; i < maxCommon; ++i) {
+ diff |= 0xff & (start[i] ^ stop[i]);
+ diff <<= Byte.SIZE;
+ }
+
+ if (startRow.getLength() != stopRow.getLength())
+ diff |= 0xff;
+
+ return diff + 1;
+ }
+
+ @Override
+ public String[] getLocations() throws IOException {
+ return Arrays.copyOf(locations, locations.length);
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ range.readFields(in);
+ tableName = in.readUTF();
+ tableId = in.readUTF();
+ int numLocs = in.readInt();
+ locations = new String[numLocs];
+ for (int i = 0; i < numLocs; ++i)
+ locations[i] = in.readUTF();
+
+ if (in.readBoolean()) {
+ isolatedScan = in.readBoolean();
+ }
+
+ if (in.readBoolean()) {
+ offline = in.readBoolean();
+ }
+
+ if (in.readBoolean()) {
+ localIterators = in.readBoolean();
+ }
+
+ if (in.readBoolean()) {
+ mockInstance = in.readBoolean();
+ }
+
+ if (in.readBoolean()) {
+ int numColumns = in.readInt();
+ List<String> columns = new ArrayList<String>(numColumns);
+ for (int i = 0; i < numColumns; i++) {
+ columns.add(in.readUTF());
+ }
+
+ fetchedColumns = InputConfigurator.deserializeFetchedColumns(columns);
+ }
+
+ if (in.readBoolean()) {
+ String strAuths = in.readUTF();
+ auths = new Authorizations(strAuths.getBytes(StandardCharsets.UTF_8));
+ }
+
+ if (in.readBoolean()) {
+ principal = in.readUTF();
+ }
+
+ if (in.readBoolean()) {
+ int ordinal = in.readInt();
+ this.tokenSource = TokenSource.values()[ordinal];
+
+ switch (this.tokenSource) {
+ case INLINE:
+ String tokenClass = in.readUTF();
+ byte[] base64TokenBytes = in.readUTF().getBytes(StandardCharsets.UTF_8);
+ byte[] tokenBytes = Base64.decodeBase64(base64TokenBytes);
+
+ this.token = AuthenticationTokenSerializer.deserialize(tokenClass, tokenBytes);
+ break;
+
+ case FILE:
+ this.tokenFile = in.readUTF();
+
+ break;
+ default:
+ throw new IOException("Cannot parse unknown TokenSource ordinal");
+ }
+ }
+
+ if (in.readBoolean()) {
+ instanceName = in.readUTF();
+ }
+
+ if (in.readBoolean()) {
+ zooKeepers = in.readUTF();
+ }
+
+ if (in.readBoolean()) {
+ level = Level.toLevel(in.readInt());
+ }
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ range.write(out);
+ out.writeUTF(tableName);
+ out.writeUTF(tableId);
+ out.writeInt(locations.length);
+ for (int i = 0; i < locations.length; ++i)
+ out.writeUTF(locations[i]);
+
+ out.writeBoolean(null != isolatedScan);
+ if (null != isolatedScan) {
+ out.writeBoolean(isolatedScan);
+ }
+
+ out.writeBoolean(null != offline);
+ if (null != offline) {
+ out.writeBoolean(offline);
+ }
+
+ out.writeBoolean(null != localIterators);
+ if (null != localIterators) {
+ out.writeBoolean(localIterators);
+ }
+
+ out.writeBoolean(null != mockInstance);
+ if (null != mockInstance) {
+ out.writeBoolean(mockInstance);
+ }
+
+ out.writeBoolean(null != fetchedColumns);
+ if (null != fetchedColumns) {
+ String[] cols = InputConfigurator.serializeColumns(fetchedColumns);
+ out.writeInt(cols.length);
+ for (String col : cols) {
+ out.writeUTF(col);
+ }
+ }
+
+ out.writeBoolean(null != auths);
+ if (null != auths) {
+ out.writeUTF(auths.serialize());
+ }
+
+ out.writeBoolean(null != principal);
+ if (null != principal) {
+ out.writeUTF(principal);
+ }
+
+ out.writeBoolean(null != tokenSource);
+ if (null != tokenSource) {
+ out.writeInt(tokenSource.ordinal());
+
+ if (null != token && null != tokenFile) {
+ throw new IOException("Cannot use both inline AuthenticationToken and file-based AuthenticationToken");
+ } else if (null != token) {
+ out.writeUTF(token.getClass().getCanonicalName());
+ out.writeUTF(Base64.encodeBase64String(AuthenticationTokenSerializer.serialize(token)));
+ } else {
+ out.writeUTF(tokenFile);
+ }
+ }
+
+ out.writeBoolean(null != instanceName);
+ if (null != instanceName) {
+ out.writeUTF(instanceName);
+ }
+
+ out.writeBoolean(null != zooKeepers);
+ if (null != zooKeepers) {
+ out.writeUTF(zooKeepers);
+ }
+
+ out.writeBoolean(null != level);
+ if (null != level) {
+ out.writeInt(level.toInt());
+ }
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(256);
+ sb.append("Range: ").append(range);
+ sb.append(" Locations: ").append(Arrays.asList(locations));
+ sb.append(" Table: ").append(tableName);
+ sb.append(" TableID: ").append(tableId);
+ sb.append(" InstanceName: ").append(instanceName);
+ sb.append(" zooKeepers: ").append(zooKeepers);
+ sb.append(" principal: ").append(principal);
+ sb.append(" tokenSource: ").append(tokenSource);
+ sb.append(" authenticationToken: ").append(token);
+ sb.append(" authenticationTokenFile: ").append(tokenFile);
+ sb.append(" Authorizations: ").append(auths);
+ sb.append(" offlineScan: ").append(offline);
+ sb.append(" mockInstance: ").append(mockInstance);
+ sb.append(" isolatedScan: ").append(isolatedScan);
+ sb.append(" localIterators: ").append(localIterators);
+ sb.append(" fetchColumns: ").append(fetchedColumns);
+ sb.append(" iterators: ").append(iterators);
+ sb.append(" logLevel: ").append(level);
+ return sb.toString();
+ }
+
+ public String getTableName() {
+ return tableName;
+ }
+
+ public void setTableName(String table) {
+ this.tableName = table;
+ }
+
+ public void setTableId(String tableId) {
+ this.tableId = tableId;
+ }
+
+ public String getTableId() {
+ return tableId;
+ }
+
+ public Instance getInstance() {
+ if (null == instanceName) {
+ return null;
+ }
+
+ if (isMockInstance()) {
+ return new MockInstance(getInstanceName());
+ }
+
+ if (null == zooKeepers) {
+ return null;
+ }
+
+ return new ZooKeeperInstance(ClientConfiguration.loadDefault().withInstance(getInstanceName()).withZkHosts(getZooKeepers()));
+ }
+
+ public String getInstanceName() {
+ return instanceName;
+ }
+
+ public void setInstanceName(String instanceName) {
+ this.instanceName = instanceName;
+ }
+
+ public String getZooKeepers() {
+ return zooKeepers;
+ }
+
+ public void setZooKeepers(String zooKeepers) {
+ this.zooKeepers = zooKeepers;
+ }
+
+ public String getPrincipal() {
+ return principal;
+ }
+
+ public void setPrincipal(String principal) {
+ this.principal = principal;
+ }
+
+ public AuthenticationToken getToken() {
+ return token;
+ }
+
+ public void setToken(AuthenticationToken token) {
+ this.tokenSource = TokenSource.INLINE;
+ this.token = token;
+ }
+
+ public void setToken(String tokenFile) {
+ this.tokenSource = TokenSource.FILE;
+ this.tokenFile = tokenFile;
+ }
+
+ public Boolean isOffline() {
+ return offline;
+ }
+
+ public void setOffline(Boolean offline) {
+ this.offline = offline;
+ }
+
+ public void setLocations(String[] locations) {
+ this.locations = Arrays.copyOf(locations, locations.length);
+ }
+
+ public Boolean isMockInstance() {
+ return mockInstance;
+ }
+
+ public void setMockInstance(Boolean mockInstance) {
+ this.mockInstance = mockInstance;
+ }
+
+ public Boolean isIsolatedScan() {
+ return isolatedScan;
+ }
+
+ public void setIsolatedScan(Boolean isolatedScan) {
+ this.isolatedScan = isolatedScan;
+ }
+
+ public Authorizations getAuths() {
+ return auths;
+ }
+
+ public void setAuths(Authorizations auths) {
+ this.auths = auths;
+ }
+
+ public void setRange(Range range) {
+ this.range = range;
+ }
+
+ public Boolean usesLocalIterators() {
+ return localIterators;
+ }
+
+ public void setUsesLocalIterators(Boolean localIterators) {
+ this.localIterators = localIterators;
+ }
+
+ public Set<Pair<Text,Text>> getFetchedColumns() {
+ return fetchedColumns;
+ }
+
+ public void setFetchedColumns(Collection<Pair<Text,Text>> fetchedColumns) {
+ this.fetchedColumns = new HashSet<Pair<Text,Text>>();
+ for (Pair<Text,Text> columns : fetchedColumns) {
+ this.fetchedColumns.add(columns);
+ }
+ }
+
+ public void setFetchedColumns(Set<Pair<Text,Text>> fetchedColumns) {
+ this.fetchedColumns = fetchedColumns;
+ }
+
+ public List<IteratorSetting> getIterators() {
+ return iterators;
+ }
+
+ public void setIterators(List<IteratorSetting> iterators) {
+ this.iterators = iterators;
+ }
+
+ public Level getLogLevel() {
+ return level;
+ }
+
+ public void setLogLevel(Level level) {
+ this.level = level;
+ }
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/ConfiguratorBase.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/ConfiguratorBase.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/ConfiguratorBase.java
new file mode 100644
index 0000000..4610556
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/ConfiguratorBase.java
@@ -0,0 +1,369 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce.lib.impl;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.nio.charset.StandardCharsets;
+
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.ClientConfiguration;
+import org.apache.accumulo.core.client.Instance;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
+import org.apache.accumulo.core.client.security.tokens.AuthenticationToken.AuthenticationTokenSerializer;
+import org.apache.accumulo.core.security.Credentials;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+
+/**
+ * @since 1.6.0
+ */
+public class ConfiguratorBase {
+
+ /**
+ * Configuration keys for {@link Instance#getConnector(String, AuthenticationToken)}.
+ *
+ * @since 1.6.0
+ */
+ public static enum ConnectorInfo {
+ IS_CONFIGURED, PRINCIPAL, TOKEN,
+ }
+
+ public static enum TokenSource {
+ FILE, INLINE;
+
+ private String prefix;
+
+ private TokenSource() {
+ prefix = name().toLowerCase() + ":";
+ }
+
+ public String prefix() {
+ return prefix;
+ }
+ }
+
+ /**
+ * Configuration keys for {@link Instance}, {@link ZooKeeperInstance}, and {@link MockInstance}.
+ *
+ * @since 1.6.0
+ */
+ public static enum InstanceOpts {
+ TYPE, NAME, ZOO_KEEPERS, CLIENT_CONFIG;
+ }
+
+ /**
+ * Configuration keys for general configuration options.
+ *
+ * @since 1.6.0
+ */
+ public static enum GeneralOpts {
+ LOG_LEVEL
+ }
+
+ /**
+ * Provides a configuration key for a given feature enum, prefixed by the implementingClass
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param e
+ * the enum used to provide the unique part of the configuration key
+ * @return the configuration key
+ * @since 1.6.0
+ */
+ protected static String enumToConfKey(Class<?> implementingClass, Enum<?> e) {
+ return implementingClass.getSimpleName() + "." + e.getDeclaringClass().getSimpleName() + "." + StringUtils.camelize(e.name().toLowerCase());
+ }
+
+ /**
+ * Sets the connector information needed to communicate with Accumulo in this job.
+ *
+ * <p>
+ * <b>WARNING:</b> The serialized token is stored in the configuration and shared with all MapReduce tasks. It is BASE64 encoded to provide a charset safe
+ * conversion to a string, and is not intended to be secure.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param principal
+ * a valid Accumulo user name
+ * @param token
+ * the user's password
+ * @since 1.6.0
+ */
+ public static void setConnectorInfo(Class<?> implementingClass, Configuration conf, String principal, AuthenticationToken token)
+ throws AccumuloSecurityException {
+ if (isConnectorInfoSet(implementingClass, conf))
+ throw new IllegalStateException("Connector info for " + implementingClass.getSimpleName() + " can only be set once per job");
+
+ checkArgument(principal != null, "principal is null");
+ checkArgument(token != null, "token is null");
+ conf.setBoolean(enumToConfKey(implementingClass, ConnectorInfo.IS_CONFIGURED), true);
+ conf.set(enumToConfKey(implementingClass, ConnectorInfo.PRINCIPAL), principal);
+ conf.set(enumToConfKey(implementingClass, ConnectorInfo.TOKEN),
+ TokenSource.INLINE.prefix() + token.getClass().getName() + ":" + Base64.encodeBase64String(AuthenticationTokenSerializer.serialize(token)));
+ }
+
+ /**
+ * Sets the connector information needed to communicate with Accumulo in this job.
+ *
+ * <p>
+ * Pulls a token file into the Distributed Cache that contains the authentication token in an attempt to be more secure than storing the password in the
+ * Configuration. Token file created with "bin/accumulo create-token".
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param principal
+ * a valid Accumulo user name
+ * @param tokenFile
+ * the path to the token file in DFS
+ * @since 1.6.0
+ */
+ public static void setConnectorInfo(Class<?> implementingClass, Configuration conf, String principal, String tokenFile) throws AccumuloSecurityException {
+ if (isConnectorInfoSet(implementingClass, conf))
+ throw new IllegalStateException("Connector info for " + implementingClass.getSimpleName() + " can only be set once per job");
+
+ checkArgument(principal != null, "principal is null");
+ checkArgument(tokenFile != null, "tokenFile is null");
+
+ try {
+ DistributedCacheHelper.addCacheFile(new URI(tokenFile), conf);
+ } catch (URISyntaxException e) {
+ throw new IllegalStateException("Unable to add tokenFile \"" + tokenFile + "\" to distributed cache.");
+ }
+
+ conf.setBoolean(enumToConfKey(implementingClass, ConnectorInfo.IS_CONFIGURED), true);
+ conf.set(enumToConfKey(implementingClass, ConnectorInfo.PRINCIPAL), principal);
+ conf.set(enumToConfKey(implementingClass, ConnectorInfo.TOKEN), TokenSource.FILE.prefix() + tokenFile);
+ }
+
+ /**
+ * Determines if the connector info has already been set for this instance.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @return true if the connector info has already been set, false otherwise
+ * @since 1.6.0
+ * @see #setConnectorInfo(Class, Configuration, String, AuthenticationToken)
+ */
+ public static Boolean isConnectorInfoSet(Class<?> implementingClass, Configuration conf) {
+ return conf.getBoolean(enumToConfKey(implementingClass, ConnectorInfo.IS_CONFIGURED), false);
+ }
+
+ /**
+ * Gets the user name from the configuration.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @return the principal
+ * @since 1.6.0
+ * @see #setConnectorInfo(Class, Configuration, String, AuthenticationToken)
+ */
+ public static String getPrincipal(Class<?> implementingClass, Configuration conf) {
+ return conf.get(enumToConfKey(implementingClass, ConnectorInfo.PRINCIPAL));
+ }
+
+ /**
+ * Gets the authenticated token from either the specified token file or directly from the configuration, whichever was used when the job was configured.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @return the principal's authentication token
+ * @since 1.6.0
+ * @see #setConnectorInfo(Class, Configuration, String, AuthenticationToken)
+ * @see #setConnectorInfo(Class, Configuration, String, String)
+ */
+ public static AuthenticationToken getAuthenticationToken(Class<?> implementingClass, Configuration conf) {
+ String token = conf.get(enumToConfKey(implementingClass, ConnectorInfo.TOKEN));
+ if (token == null || token.isEmpty())
+ return null;
+ if (token.startsWith(TokenSource.INLINE.prefix())) {
+ String[] args = token.substring(TokenSource.INLINE.prefix().length()).split(":", 2);
+ if (args.length == 2)
+ return AuthenticationTokenSerializer.deserialize(args[0], Base64.decodeBase64(args[1].getBytes(StandardCharsets.UTF_8)));
+ } else if (token.startsWith(TokenSource.FILE.prefix())) {
+ String tokenFileName = token.substring(TokenSource.FILE.prefix().length());
+ return getTokenFromFile(conf, getPrincipal(implementingClass, conf), tokenFileName);
+ }
+
+ throw new IllegalStateException("Token was not properly serialized into the configuration");
+ }
+
+ /**
+ * Reads from the token file in distributed cache. Currently, the token file stores data separated by colons e.g. principal:token_class:token
+ *
+ * @param conf
+ * the Hadoop context for the configured job
+ * @return path to the token file as a String
+ * @since 1.6.0
+ * @see #setConnectorInfo(Class, Configuration, String, AuthenticationToken)
+ */
+ public static AuthenticationToken getTokenFromFile(Configuration conf, String principal, String tokenFile) {
+ FSDataInputStream in = null;
+ try {
+ URI[] uris = DistributedCacheHelper.getCacheFiles(conf);
+ Path path = null;
+ for (URI u : uris) {
+ if (u.toString().equals(tokenFile)) {
+ path = new Path(u);
+ }
+ }
+ if (path == null) {
+ throw new IllegalArgumentException("Couldn't find password file called \"" + tokenFile + "\" in cache.");
+ }
+ FileSystem fs = FileSystem.get(conf);
+ in = fs.open(path);
+ } catch (IOException e) {
+ throw new IllegalArgumentException("Couldn't open password file called \"" + tokenFile + "\".");
+ }
+ try (java.util.Scanner fileScanner = new java.util.Scanner(in)) {
+ while (fileScanner.hasNextLine()) {
+ Credentials creds = Credentials.deserialize(fileScanner.nextLine());
+ if (principal.equals(creds.getPrincipal())) {
+ return creds.getToken();
+ }
+ }
+ throw new IllegalArgumentException("Couldn't find token for user \"" + principal + "\" in file \"" + tokenFile + "\"");
+ }
+ }
+
+ /**
+ * Configures a {@link ZooKeeperInstance} for this job.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param clientConfig
+ * client configuration for specifying connection timeouts, SSL connection options, etc.
+ * @since 1.6.0
+ */
+ public static void setZooKeeperInstance(Class<?> implementingClass, Configuration conf, ClientConfiguration clientConfig) {
+ String key = enumToConfKey(implementingClass, InstanceOpts.TYPE);
+ if (!conf.get(key, "").isEmpty())
+ throw new IllegalStateException("Instance info can only be set once per job; it has already been configured with " + conf.get(key));
+ conf.set(key, "ZooKeeperInstance");
+ if (clientConfig != null) {
+ conf.set(enumToConfKey(implementingClass, InstanceOpts.CLIENT_CONFIG), clientConfig.serialize());
+ }
+ }
+
+ /**
+ * Configures a {@link MockInstance} for this job.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param instanceName
+ * the Accumulo instance name
+ * @since 1.6.0
+ */
+ public static void setMockInstance(Class<?> implementingClass, Configuration conf, String instanceName) {
+ String key = enumToConfKey(implementingClass, InstanceOpts.TYPE);
+ if (!conf.get(key, "").isEmpty())
+ throw new IllegalStateException("Instance info can only be set once per job; it has already been configured with " + conf.get(key));
+ conf.set(key, "MockInstance");
+
+ checkArgument(instanceName != null, "instanceName is null");
+ conf.set(enumToConfKey(implementingClass, InstanceOpts.NAME), instanceName);
+ }
+
+ /**
+ * Initializes an Accumulo {@link Instance} based on the configuration.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @return an Accumulo instance
+ * @since 1.6.0
+ * @see #setZooKeeperInstance(Class, Configuration, ClientConfiguration)
+ * @see #setMockInstance(Class, Configuration, String)
+ */
+ public static Instance getInstance(Class<?> implementingClass, Configuration conf) {
+ String instanceType = conf.get(enumToConfKey(implementingClass, InstanceOpts.TYPE), "");
+ if ("MockInstance".equals(instanceType))
+ return new MockInstance(conf.get(enumToConfKey(implementingClass, InstanceOpts.NAME)));
+ else if ("ZooKeeperInstance".equals(instanceType)) {
+ String clientConfigString = conf.get(enumToConfKey(implementingClass, InstanceOpts.CLIENT_CONFIG));
+ if (clientConfigString == null) {
+ String instanceName = conf.get(enumToConfKey(implementingClass, InstanceOpts.NAME));
+ String zookeepers = conf.get(enumToConfKey(implementingClass, InstanceOpts.ZOO_KEEPERS));
+ return new ZooKeeperInstance(ClientConfiguration.loadDefault().withInstance(instanceName).withZkHosts(zookeepers));
+ } else {
+ return new ZooKeeperInstance(ClientConfiguration.deserialize(clientConfigString));
+ }
+ } else if (instanceType.isEmpty())
+ throw new IllegalStateException("Instance has not been configured for " + implementingClass.getSimpleName());
+ else
+ throw new IllegalStateException("Unrecognized instance type " + instanceType);
+ }
+
+ /**
+ * Sets the log level for this job.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @param level
+ * the logging level
+ * @since 1.6.0
+ */
+ public static void setLogLevel(Class<?> implementingClass, Configuration conf, Level level) {
+ checkArgument(level != null, "level is null");
+ Logger.getLogger(implementingClass).setLevel(level);
+ conf.setInt(enumToConfKey(implementingClass, GeneralOpts.LOG_LEVEL), level.toInt());
+ }
+
+ /**
+ * Gets the log level from this configuration.
+ *
+ * @param implementingClass
+ * the class whose name will be used as a prefix for the property configuration key
+ * @param conf
+ * the Hadoop configuration object to configure
+ * @return the log level
+ * @since 1.6.0
+ * @see #setLogLevel(Class, Configuration, Level)
+ */
+ public static Level getLogLevel(Class<?> implementingClass, Configuration conf) {
+ return Level.toLevel(conf.getInt(enumToConfKey(implementingClass, GeneralOpts.LOG_LEVEL), Level.INFO.toInt()));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/DistributedCacheHelper.java
----------------------------------------------------------------------
diff --git a/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/DistributedCacheHelper.java b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/DistributedCacheHelper.java
new file mode 100644
index 0000000..c694b9a
--- /dev/null
+++ b/mapreduce/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/DistributedCacheHelper.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce.lib.impl;
+
+import java.io.IOException;
+import java.net.URI;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.filecache.DistributedCache;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * @since 1.6.0
+ */
+@SuppressWarnings("deprecation")
+public class DistributedCacheHelper {
+
+ /**
+ * @since 1.6.0
+ */
+ public static void addCacheFile(URI uri, Configuration conf) {
+ DistributedCache.addCacheFile(uri, conf);
+ }
+
+ /**
+ * @since 1.6.0
+ */
+ public static URI[] getCacheFiles(Configuration conf) throws IOException {
+ return DistributedCache.getCacheFiles(conf);
+ }
+
+ /**
+ * @since 1.6.0
+ */
+ public static Path[] getLocalCacheFiles(Configuration conf) throws IOException {
+ return DistributedCache.getLocalCacheFiles(conf);
+ }
+}
[08/12] ACCUMULO-1880 create mapreduce module
Posted by md...@apache.org.
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloOutputFormatTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloOutputFormatTest.java b/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloOutputFormatTest.java
deleted file mode 100644
index 36054c8..0000000
--- a/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloOutputFormatTest.java
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapred;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotEquals;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.Map.Entry;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.accumulo.core.client.BatchWriter;
-import org.apache.accumulo.core.client.BatchWriterConfig;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.Scanner;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.client.security.tokens.PasswordToken;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.core.util.CachedConfiguration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.Test;
-
-/**
- *
- */
-public class AccumuloOutputFormatTest {
- private static AssertionError e1 = null;
- private static final String PREFIX = AccumuloOutputFormatTest.class.getSimpleName();
- private static final String INSTANCE_NAME = PREFIX + "_mapred_instance";
- private static final String TEST_TABLE_1 = PREFIX + "_mapred_table_1";
- private static final String TEST_TABLE_2 = PREFIX + "_mapred_table_2";
-
- private static class MRTester extends Configured implements Tool {
- private static class TestMapper implements Mapper<Key,Value,Text,Mutation> {
- Key key = null;
- int count = 0;
- OutputCollector<Text,Mutation> finalOutput;
-
- @Override
- public void map(Key k, Value v, OutputCollector<Text,Mutation> output, Reporter reporter) throws IOException {
- finalOutput = output;
- try {
- if (key != null)
- assertEquals(key.getRow().toString(), new String(v.get()));
- assertEquals(k.getRow(), new Text(String.format("%09x", count + 1)));
- assertEquals(new String(v.get()), String.format("%09x", count));
- } catch (AssertionError e) {
- e1 = e;
- }
- key = new Key(k);
- count++;
- }
-
- @Override
- public void configure(JobConf job) {}
-
- @Override
- public void close() throws IOException {
- Mutation m = new Mutation("total");
- m.put("", "", Integer.toString(count));
- finalOutput.collect(new Text(), m);
- }
-
- }
-
- @Override
- public int run(String[] args) throws Exception {
-
- if (args.length != 4) {
- throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <user> <pass> <inputtable> <outputtable>");
- }
-
- String user = args[0];
- String pass = args[1];
- String table1 = args[2];
- String table2 = args[3];
-
- JobConf job = new JobConf(getConf());
- job.setJarByClass(this.getClass());
-
- job.setInputFormat(AccumuloInputFormat.class);
-
- AccumuloInputFormat.setConnectorInfo(job, user, new PasswordToken(pass));
- AccumuloInputFormat.setInputTableName(job, table1);
- AccumuloInputFormat.setMockInstance(job, INSTANCE_NAME);
-
- job.setMapperClass(TestMapper.class);
- job.setMapOutputKeyClass(Key.class);
- job.setMapOutputValueClass(Value.class);
- job.setOutputFormat(AccumuloOutputFormat.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(Mutation.class);
-
- AccumuloOutputFormat.setConnectorInfo(job, user, new PasswordToken(pass));
- AccumuloOutputFormat.setCreateTables(job, false);
- AccumuloOutputFormat.setDefaultTableName(job, table2);
- AccumuloOutputFormat.setMockInstance(job, INSTANCE_NAME);
-
- job.setNumReduceTasks(0);
-
- return JobClient.runJob(job).isSuccessful() ? 0 : 1;
- }
-
- public static void main(String[] args) throws Exception {
- assertEquals(0, ToolRunner.run(CachedConfiguration.getInstance(), new MRTester(), args));
- }
- }
-
- @Test
- public void testBWSettings() throws IOException {
- JobConf job = new JobConf();
-
- // make sure we aren't testing defaults
- final BatchWriterConfig bwDefaults = new BatchWriterConfig();
- assertNotEquals(7654321l, bwDefaults.getMaxLatency(TimeUnit.MILLISECONDS));
- assertNotEquals(9898989l, bwDefaults.getTimeout(TimeUnit.MILLISECONDS));
- assertNotEquals(42, bwDefaults.getMaxWriteThreads());
- assertNotEquals(1123581321l, bwDefaults.getMaxMemory());
-
- final BatchWriterConfig bwConfig = new BatchWriterConfig();
- bwConfig.setMaxLatency(7654321l, TimeUnit.MILLISECONDS);
- bwConfig.setTimeout(9898989l, TimeUnit.MILLISECONDS);
- bwConfig.setMaxWriteThreads(42);
- bwConfig.setMaxMemory(1123581321l);
- AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig);
-
- AccumuloOutputFormat myAOF = new AccumuloOutputFormat() {
- @Override
- public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException {
- BatchWriterConfig bwOpts = getBatchWriterOptions(job);
-
- // passive check
- assertEquals(bwConfig.getMaxLatency(TimeUnit.MILLISECONDS), bwOpts.getMaxLatency(TimeUnit.MILLISECONDS));
- assertEquals(bwConfig.getTimeout(TimeUnit.MILLISECONDS), bwOpts.getTimeout(TimeUnit.MILLISECONDS));
- assertEquals(bwConfig.getMaxWriteThreads(), bwOpts.getMaxWriteThreads());
- assertEquals(bwConfig.getMaxMemory(), bwOpts.getMaxMemory());
-
- // explicit check
- assertEquals(7654321l, bwOpts.getMaxLatency(TimeUnit.MILLISECONDS));
- assertEquals(9898989l, bwOpts.getTimeout(TimeUnit.MILLISECONDS));
- assertEquals(42, bwOpts.getMaxWriteThreads());
- assertEquals(1123581321l, bwOpts.getMaxMemory());
-
- }
- };
- myAOF.checkOutputSpecs(null, job);
- }
-
- @Test
- public void testMR() throws Exception {
- MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
- Connector c = mockInstance.getConnector("root", new PasswordToken(""));
- c.tableOperations().create(TEST_TABLE_1);
- c.tableOperations().create(TEST_TABLE_2);
- BatchWriter bw = c.createBatchWriter(TEST_TABLE_1, new BatchWriterConfig());
- for (int i = 0; i < 100; i++) {
- Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
- m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
- bw.addMutation(m);
- }
- bw.close();
-
- MRTester.main(new String[] {"root", "", TEST_TABLE_1, TEST_TABLE_2});
- assertNull(e1);
-
- Scanner scanner = c.createScanner(TEST_TABLE_2, new Authorizations());
- Iterator<Entry<Key,Value>> iter = scanner.iterator();
- assertTrue(iter.hasNext());
- Entry<Key,Value> entry = iter.next();
- assertEquals(Integer.parseInt(new String(entry.getValue().get())), 100);
- assertFalse(iter.hasNext());
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloRowInputFormatTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloRowInputFormatTest.java b/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloRowInputFormatTest.java
deleted file mode 100644
index a0ae0b3..0000000
--- a/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloRowInputFormatTest.java
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapred;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map.Entry;
-
-import org.apache.accumulo.core.client.BatchWriter;
-import org.apache.accumulo.core.client.BatchWriterConfig;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.MutationsRejectedException;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.client.security.tokens.PasswordToken;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.KeyValue;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.security.ColumnVisibility;
-import org.apache.accumulo.core.util.CachedConfiguration;
-import org.apache.accumulo.core.util.PeekingIterator;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.lib.NullOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.Test;
-
-public class AccumuloRowInputFormatTest {
- private static final String PREFIX = AccumuloRowInputFormatTest.class.getSimpleName();
- private static final String INSTANCE_NAME = PREFIX + "_mapred_instance";
- private static final String TEST_TABLE_1 = PREFIX + "_mapred_table_1";
-
- private static final String ROW1 = "row1";
- private static final String ROW2 = "row2";
- private static final String ROW3 = "row3";
- private static final String COLF1 = "colf1";
- private static List<Entry<Key,Value>> row1;
- private static List<Entry<Key,Value>> row2;
- private static List<Entry<Key,Value>> row3;
- private static AssertionError e1 = null;
- private static AssertionError e2 = null;
-
- public AccumuloRowInputFormatTest() {
- row1 = new ArrayList<Entry<Key,Value>>();
- row1.add(new KeyValue(new Key(ROW1, COLF1, "colq1"), "v1".getBytes()));
- row1.add(new KeyValue(new Key(ROW1, COLF1, "colq2"), "v2".getBytes()));
- row1.add(new KeyValue(new Key(ROW1, "colf2", "colq3"), "v3".getBytes()));
- row2 = new ArrayList<Entry<Key,Value>>();
- row2.add(new KeyValue(new Key(ROW2, COLF1, "colq4"), "v4".getBytes()));
- row3 = new ArrayList<Entry<Key,Value>>();
- row3.add(new KeyValue(new Key(ROW3, COLF1, "colq5"), "v5".getBytes()));
- }
-
- public static void checkLists(final List<Entry<Key,Value>> first, final List<Entry<Key,Value>> second) {
- assertEquals("Sizes should be the same.", first.size(), second.size());
- for (int i = 0; i < first.size(); i++) {
- assertEquals("Keys should be equal.", first.get(i).getKey(), second.get(i).getKey());
- assertEquals("Values should be equal.", first.get(i).getValue(), second.get(i).getValue());
- }
- }
-
- public static void checkLists(final List<Entry<Key,Value>> first, final Iterator<Entry<Key,Value>> second) {
- int entryIndex = 0;
- while (second.hasNext()) {
- final Entry<Key,Value> entry = second.next();
- assertEquals("Keys should be equal", first.get(entryIndex).getKey(), entry.getKey());
- assertEquals("Values should be equal", first.get(entryIndex).getValue(), entry.getValue());
- entryIndex++;
- }
- }
-
- public static void insertList(final BatchWriter writer, final List<Entry<Key,Value>> list) throws MutationsRejectedException {
- for (Entry<Key,Value> e : list) {
- final Key key = e.getKey();
- final Mutation mutation = new Mutation(key.getRow());
- ColumnVisibility colVisibility = new ColumnVisibility(key.getColumnVisibility());
- mutation.put(key.getColumnFamily(), key.getColumnQualifier(), colVisibility, key.getTimestamp(), e.getValue());
- writer.addMutation(mutation);
- }
- }
-
- private static class MRTester extends Configured implements Tool {
- public static class TestMapper implements Mapper<Text,PeekingIterator<Entry<Key,Value>>,Key,Value> {
- int count = 0;
-
- @Override
- public void map(Text k, PeekingIterator<Entry<Key,Value>> v, OutputCollector<Key,Value> output, Reporter reporter) throws IOException {
- try {
- switch (count) {
- case 0:
- assertEquals("Current key should be " + ROW1, new Text(ROW1), k);
- checkLists(row1, v);
- break;
- case 1:
- assertEquals("Current key should be " + ROW2, new Text(ROW2), k);
- checkLists(row2, v);
- break;
- case 2:
- assertEquals("Current key should be " + ROW3, new Text(ROW3), k);
- checkLists(row3, v);
- break;
- default:
- assertTrue(false);
- }
- } catch (AssertionError e) {
- e1 = e;
- }
- count++;
- }
-
- @Override
- public void configure(JobConf job) {}
-
- @Override
- public void close() throws IOException {
- try {
- assertEquals(3, count);
- } catch (AssertionError e) {
- e2 = e;
- }
- }
-
- }
-
- @Override
- public int run(String[] args) throws Exception {
-
- if (args.length != 3) {
- throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <user> <pass> <table>");
- }
-
- String user = args[0];
- String pass = args[1];
- String table = args[2];
-
- JobConf job = new JobConf(getConf());
- job.setJarByClass(this.getClass());
-
- job.setInputFormat(AccumuloRowInputFormat.class);
-
- AccumuloInputFormat.setConnectorInfo(job, user, new PasswordToken(pass));
- AccumuloInputFormat.setInputTableName(job, table);
- AccumuloRowInputFormat.setMockInstance(job, INSTANCE_NAME);
-
- job.setMapperClass(TestMapper.class);
- job.setMapOutputKeyClass(Key.class);
- job.setMapOutputValueClass(Value.class);
- job.setOutputFormat(NullOutputFormat.class);
-
- job.setNumReduceTasks(0);
-
- return JobClient.runJob(job).isSuccessful() ? 0 : 1;
- }
-
- public static void main(String[] args) throws Exception {
- assertEquals(0, ToolRunner.run(CachedConfiguration.getInstance(), new MRTester(), args));
- }
- }
-
- @Test
- public void test() throws Exception {
- final MockInstance instance = new MockInstance(INSTANCE_NAME);
- final Connector conn = instance.getConnector("root", new PasswordToken(""));
- conn.tableOperations().create(TEST_TABLE_1);
- BatchWriter writer = null;
- try {
- writer = conn.createBatchWriter(TEST_TABLE_1, new BatchWriterConfig());
- insertList(writer, row1);
- insertList(writer, row2);
- insertList(writer, row3);
- } finally {
- if (writer != null) {
- writer.close();
- }
- }
- MRTester.main(new String[] {"root", "", TEST_TABLE_1});
- assertNull(e1);
- assertNull(e2);
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/test/java/org/apache/accumulo/core/client/mapred/TokenFileTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/client/mapred/TokenFileTest.java b/core/src/test/java/org/apache/accumulo/core/client/mapred/TokenFileTest.java
deleted file mode 100644
index 0e1fe39..0000000
--- a/core/src/test/java/org/apache/accumulo/core/client/mapred/TokenFileTest.java
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapred;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.Iterator;
-import java.util.Map.Entry;
-
-import org.apache.accumulo.core.client.BatchWriter;
-import org.apache.accumulo.core.client.BatchWriterConfig;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.Scanner;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.client.security.tokens.PasswordToken;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.core.security.Credentials;
-import org.apache.accumulo.core.util.CachedConfiguration;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-
-/**
- *
- */
-public class TokenFileTest {
- private static AssertionError e1 = null;
- private static final String PREFIX = TokenFileTest.class.getSimpleName();
- private static final String INSTANCE_NAME = PREFIX + "_mapred_instance";
- private static final String TEST_TABLE_1 = PREFIX + "_mapred_table_1";
- private static final String TEST_TABLE_2 = PREFIX + "_mapred_table_2";
-
- private static class MRTokenFileTester extends Configured implements Tool {
- private static class TestMapper implements Mapper<Key,Value,Text,Mutation> {
- Key key = null;
- int count = 0;
- OutputCollector<Text,Mutation> finalOutput;
-
- @Override
- public void map(Key k, Value v, OutputCollector<Text,Mutation> output, Reporter reporter) throws IOException {
- finalOutput = output;
- try {
- if (key != null)
- assertEquals(key.getRow().toString(), new String(v.get()));
- assertEquals(k.getRow(), new Text(String.format("%09x", count + 1)));
- assertEquals(new String(v.get()), String.format("%09x", count));
- } catch (AssertionError e) {
- e1 = e;
- }
- key = new Key(k);
- count++;
- }
-
- @Override
- public void configure(JobConf job) {}
-
- @Override
- public void close() throws IOException {
- Mutation m = new Mutation("total");
- m.put("", "", Integer.toString(count));
- finalOutput.collect(new Text(), m);
- }
-
- }
-
- @Override
- public int run(String[] args) throws Exception {
-
- if (args.length != 4) {
- throw new IllegalArgumentException("Usage : " + MRTokenFileTester.class.getName() + " <user> <token file> <inputtable> <outputtable>");
- }
-
- String user = args[0];
- String tokenFile = args[1];
- String table1 = args[2];
- String table2 = args[3];
-
- JobConf job = new JobConf(getConf());
- job.setJarByClass(this.getClass());
-
- job.setInputFormat(AccumuloInputFormat.class);
-
- AccumuloInputFormat.setConnectorInfo(job, user, tokenFile);
- AccumuloInputFormat.setInputTableName(job, table1);
- AccumuloInputFormat.setMockInstance(job, INSTANCE_NAME);
-
- job.setMapperClass(TestMapper.class);
- job.setMapOutputKeyClass(Key.class);
- job.setMapOutputValueClass(Value.class);
- job.setOutputFormat(AccumuloOutputFormat.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(Mutation.class);
-
- AccumuloOutputFormat.setConnectorInfo(job, user, tokenFile);
- AccumuloOutputFormat.setCreateTables(job, false);
- AccumuloOutputFormat.setDefaultTableName(job, table2);
- AccumuloOutputFormat.setMockInstance(job, INSTANCE_NAME);
-
- job.setNumReduceTasks(0);
-
- return JobClient.runJob(job).isSuccessful() ? 0 : 1;
- }
-
- public static void main(String[] args) throws Exception {
- Configuration conf = CachedConfiguration.getInstance();
- conf.set("hadoop.tmp.dir", new File(args[1]).getParent());
- assertEquals(0, ToolRunner.run(conf, new MRTokenFileTester(), args));
- }
- }
-
- @Rule
- public TemporaryFolder folder = new TemporaryFolder(new File(System.getProperty("user.dir") + "/target"));
-
- @Test
- public void testMR() throws Exception {
- MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
- Connector c = mockInstance.getConnector("root", new PasswordToken(""));
- c.tableOperations().create(TEST_TABLE_1);
- c.tableOperations().create(TEST_TABLE_2);
- BatchWriter bw = c.createBatchWriter(TEST_TABLE_1, new BatchWriterConfig());
- for (int i = 0; i < 100; i++) {
- Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
- m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
- bw.addMutation(m);
- }
- bw.close();
-
- File tf = folder.newFile("root_test.pw");
- PrintStream out = new PrintStream(tf);
- String outString = new Credentials("root", new PasswordToken("")).serialize();
- out.println(outString);
- out.close();
-
- MRTokenFileTester.main(new String[] {"root", tf.getAbsolutePath(), TEST_TABLE_1, TEST_TABLE_2});
- assertNull(e1);
-
- Scanner scanner = c.createScanner(TEST_TABLE_2, new Authorizations());
- Iterator<Entry<Key,Value>> iter = scanner.iterator();
- assertTrue(iter.hasNext());
- Entry<Key,Value> entry = iter.next();
- assertEquals(Integer.parseInt(new String(entry.getValue().get())), 100);
- assertFalse(iter.hasNext());
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormatTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormatTest.java b/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormatTest.java
deleted file mode 100644
index 2a453e3..0000000
--- a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormatTest.java
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
-import java.io.File;
-import java.io.FileFilter;
-import java.io.IOException;
-
-import org.apache.accumulo.core.client.BatchWriter;
-import org.apache.accumulo.core.client.BatchWriterConfig;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.client.security.tokens.PasswordToken;
-import org.apache.accumulo.core.conf.AccumuloConfiguration;
-import org.apache.accumulo.core.conf.Property;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.util.CachedConfiguration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-
-public class AccumuloFileOutputFormatTest {
- private static final String PREFIX = AccumuloFileOutputFormatTest.class.getSimpleName();
- private static final String INSTANCE_NAME = PREFIX + "_mapreduce_instance";
- private static final String BAD_TABLE = PREFIX + "_mapreduce_bad_table";
- private static final String TEST_TABLE = PREFIX + "_mapreduce_test_table";
- private static final String EMPTY_TABLE = PREFIX + "_mapreduce_empty_table";
-
- private static AssertionError e1 = null;
- private static AssertionError e2 = null;
-
- @Rule
- public TemporaryFolder folder = new TemporaryFolder(new File(System.getProperty("user.dir") + "/target"));
-
- @BeforeClass
- public static void setup() throws Exception {
- MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
- Connector c = mockInstance.getConnector("root", new PasswordToken(""));
- c.tableOperations().create(EMPTY_TABLE);
- c.tableOperations().create(TEST_TABLE);
- c.tableOperations().create(BAD_TABLE);
- BatchWriter bw = c.createBatchWriter(TEST_TABLE, new BatchWriterConfig());
- Mutation m = new Mutation("Key");
- m.put("", "", "");
- bw.addMutation(m);
- bw.close();
- bw = c.createBatchWriter(BAD_TABLE, new BatchWriterConfig());
- m = new Mutation("r1");
- m.put("cf1", "cq1", "A&B");
- m.put("cf1", "cq1", "A&B");
- m.put("cf1", "cq2", "A&");
- bw.addMutation(m);
- bw.close();
- }
-
- @Test
- public void testEmptyWrite() throws Exception {
- handleWriteTests(false);
- }
-
- @Test
- public void testRealWrite() throws Exception {
- handleWriteTests(true);
- }
-
- private static class MRTester extends Configured implements Tool {
- private static class BadKeyMapper extends Mapper<Key,Value,Key,Value> {
- int index = 0;
-
- @Override
- protected void map(Key key, Value value, Context context) throws IOException, InterruptedException {
- try {
- try {
- context.write(key, value);
- if (index == 2)
- assertTrue(false);
- } catch (Exception e) {
- assertEquals(2, index);
- }
- } catch (AssertionError e) {
- e1 = e;
- }
- index++;
- }
-
- @Override
- protected void cleanup(Context context) throws IOException, InterruptedException {
- try {
- assertEquals(2, index);
- } catch (AssertionError e) {
- e2 = e;
- }
- }
- }
-
- @Override
- public int run(String[] args) throws Exception {
-
- if (args.length != 4) {
- throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <user> <pass> <table> <outputfile>");
- }
-
- String user = args[0];
- String pass = args[1];
- String table = args[2];
-
- @SuppressWarnings("deprecation")
- Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
- job.setJarByClass(this.getClass());
-
- job.setInputFormatClass(AccumuloInputFormat.class);
-
- AccumuloInputFormat.setConnectorInfo(job, user, new PasswordToken(pass));
- AccumuloInputFormat.setInputTableName(job, table);
- AccumuloInputFormat.setMockInstance(job, INSTANCE_NAME);
- AccumuloFileOutputFormat.setOutputPath(job, new Path(args[3]));
-
- job.setMapperClass(BAD_TABLE.equals(table) ? BadKeyMapper.class : Mapper.class);
- job.setMapOutputKeyClass(Key.class);
- job.setMapOutputValueClass(Value.class);
- job.setOutputFormatClass(AccumuloFileOutputFormat.class);
-
- job.setNumReduceTasks(0);
-
- job.waitForCompletion(true);
-
- return job.isSuccessful() ? 0 : 1;
- }
-
- public static void main(String[] args) throws Exception {
- assertEquals(0, ToolRunner.run(CachedConfiguration.getInstance(), new MRTester(), args));
- }
- }
-
- public void handleWriteTests(boolean content) throws Exception {
- File f = folder.newFile("handleWriteTests");
- f.delete();
- MRTester.main(new String[] {"root", "", content ? TEST_TABLE : EMPTY_TABLE, f.getAbsolutePath()});
-
- assertTrue(f.exists());
- File[] files = f.listFiles(new FileFilter() {
- @Override
- public boolean accept(File file) {
- return file.getName().startsWith("part-m-");
- }
- });
- if (content) {
- assertEquals(1, files.length);
- assertTrue(files[0].exists());
- } else {
- assertEquals(0, files.length);
- }
- }
-
- @Test
- public void writeBadVisibility() throws Exception {
- File f = folder.newFile("writeBadVisibility");
- f.delete();
- MRTester.main(new String[] {"root", "", BAD_TABLE, f.getAbsolutePath()});
- assertNull(e1);
- assertNull(e2);
- }
-
- @Test
- public void validateConfiguration() throws IOException, InterruptedException {
-
- int a = 7;
- long b = 300l;
- long c = 50l;
- long d = 10l;
- String e = "snappy";
-
- @SuppressWarnings("deprecation")
- Job job1 = new Job();
- AccumuloFileOutputFormat.setReplication(job1, a);
- AccumuloFileOutputFormat.setFileBlockSize(job1, b);
- AccumuloFileOutputFormat.setDataBlockSize(job1, c);
- AccumuloFileOutputFormat.setIndexBlockSize(job1, d);
- AccumuloFileOutputFormat.setCompressionType(job1, e);
-
- AccumuloConfiguration acuconf = AccumuloFileOutputFormat.getAccumuloConfiguration(job1);
-
- assertEquals(7, acuconf.getCount(Property.TABLE_FILE_REPLICATION));
- assertEquals(300l, acuconf.getMemoryInBytes(Property.TABLE_FILE_BLOCK_SIZE));
- assertEquals(50l, acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE));
- assertEquals(10l, acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX));
- assertEquals("snappy", acuconf.get(Property.TABLE_FILE_COMPRESSION_TYPE));
-
- a = 17;
- b = 1300l;
- c = 150l;
- d = 110l;
- e = "lzo";
-
- @SuppressWarnings("deprecation")
- Job job2 = new Job();
- AccumuloFileOutputFormat.setReplication(job2, a);
- AccumuloFileOutputFormat.setFileBlockSize(job2, b);
- AccumuloFileOutputFormat.setDataBlockSize(job2, c);
- AccumuloFileOutputFormat.setIndexBlockSize(job2, d);
- AccumuloFileOutputFormat.setCompressionType(job2, e);
-
- acuconf = AccumuloFileOutputFormat.getAccumuloConfiguration(job2);
-
- assertEquals(17, acuconf.getCount(Property.TABLE_FILE_REPLICATION));
- assertEquals(1300l, acuconf.getMemoryInBytes(Property.TABLE_FILE_BLOCK_SIZE));
- assertEquals(150l, acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE));
- assertEquals(110l, acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX));
- assertEquals("lzo", acuconf.get(Property.TABLE_FILE_COMPRESSION_TYPE));
-
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormatTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormatTest.java b/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormatTest.java
deleted file mode 100644
index 2500972..0000000
--- a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormatTest.java
+++ /dev/null
@@ -1,412 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
-import java.io.ByteArrayOutputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.accumulo.core.client.BatchWriter;
-import org.apache.accumulo.core.client.BatchWriterConfig;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.Instance;
-import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.client.security.tokens.PasswordToken;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.iterators.user.RegExFilter;
-import org.apache.accumulo.core.iterators.user.WholeRowIterator;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.core.util.CachedConfiguration;
-import org.apache.accumulo.core.util.Pair;
-import org.apache.commons.codec.binary.Base64;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.log4j.Level;
-import org.junit.Assert;
-import org.junit.Test;
-
-public class AccumuloInputFormatTest {
-
- private static final String PREFIX = AccumuloInputFormatTest.class.getSimpleName();
-
- /**
- * Check that the iterator configuration is getting stored in the Job conf correctly.
- */
- @Test
- public void testSetIterator() throws IOException {
- @SuppressWarnings("deprecation")
- Job job = new Job();
-
- IteratorSetting is = new IteratorSetting(1, "WholeRow", "org.apache.accumulo.core.iterators.WholeRowIterator");
- AccumuloInputFormat.addIterator(job, is);
- Configuration conf = job.getConfiguration();
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- is.write(new DataOutputStream(baos));
- String iterators = conf.get("AccumuloInputFormat.ScanOpts.Iterators");
- assertEquals(new String(Base64.encodeBase64(baos.toByteArray())), iterators);
- }
-
- @Test
- public void testAddIterator() throws IOException {
- @SuppressWarnings("deprecation")
- Job job = new Job();
-
- AccumuloInputFormat.addIterator(job, new IteratorSetting(1, "WholeRow", WholeRowIterator.class));
- AccumuloInputFormat.addIterator(job, new IteratorSetting(2, "Versions", "org.apache.accumulo.core.iterators.VersioningIterator"));
- IteratorSetting iter = new IteratorSetting(3, "Count", "org.apache.accumulo.core.iterators.CountingIterator");
- iter.addOption("v1", "1");
- iter.addOption("junk", "\0omg:!\\xyzzy");
- AccumuloInputFormat.addIterator(job, iter);
-
- List<IteratorSetting> list = AccumuloInputFormat.getIterators(job);
-
- // Check the list size
- assertTrue(list.size() == 3);
-
- // Walk the list and make sure our settings are correct
- IteratorSetting setting = list.get(0);
- assertEquals(1, setting.getPriority());
- assertEquals("org.apache.accumulo.core.iterators.user.WholeRowIterator", setting.getIteratorClass());
- assertEquals("WholeRow", setting.getName());
- assertEquals(0, setting.getOptions().size());
-
- setting = list.get(1);
- assertEquals(2, setting.getPriority());
- assertEquals("org.apache.accumulo.core.iterators.VersioningIterator", setting.getIteratorClass());
- assertEquals("Versions", setting.getName());
- assertEquals(0, setting.getOptions().size());
-
- setting = list.get(2);
- assertEquals(3, setting.getPriority());
- assertEquals("org.apache.accumulo.core.iterators.CountingIterator", setting.getIteratorClass());
- assertEquals("Count", setting.getName());
- assertEquals(2, setting.getOptions().size());
- assertEquals("1", setting.getOptions().get("v1"));
- assertEquals("\0omg:!\\xyzzy", setting.getOptions().get("junk"));
- }
-
- /**
- * Test adding iterator options where the keys and values contain both the FIELD_SEPARATOR character (':') and ITERATOR_SEPARATOR (',') characters. There
- * should be no exceptions thrown when trying to parse these types of option entries.
- *
- * This test makes sure that the expected raw values, as appears in the Job, are equal to what's expected.
- */
- @Test
- public void testIteratorOptionEncoding() throws Throwable {
- String key = "colon:delimited:key";
- String value = "comma,delimited,value";
- IteratorSetting someSetting = new IteratorSetting(1, "iterator", "Iterator.class");
- someSetting.addOption(key, value);
- @SuppressWarnings("deprecation")
- Job job = new Job();
- AccumuloInputFormat.addIterator(job, someSetting);
-
- List<IteratorSetting> list = AccumuloInputFormat.getIterators(job);
- assertEquals(1, list.size());
- assertEquals(1, list.get(0).getOptions().size());
- assertEquals(list.get(0).getOptions().get(key), value);
-
- someSetting.addOption(key + "2", value);
- someSetting.setPriority(2);
- someSetting.setName("it2");
- AccumuloInputFormat.addIterator(job, someSetting);
- list = AccumuloInputFormat.getIterators(job);
- assertEquals(2, list.size());
- assertEquals(1, list.get(0).getOptions().size());
- assertEquals(list.get(0).getOptions().get(key), value);
- assertEquals(2, list.get(1).getOptions().size());
- assertEquals(list.get(1).getOptions().get(key), value);
- assertEquals(list.get(1).getOptions().get(key + "2"), value);
- }
-
- /**
- * Test getting iterator settings for multiple iterators set
- */
- @Test
- public void testGetIteratorSettings() throws IOException {
- @SuppressWarnings("deprecation")
- Job job = new Job();
-
- AccumuloInputFormat.addIterator(job, new IteratorSetting(1, "WholeRow", "org.apache.accumulo.core.iterators.WholeRowIterator"));
- AccumuloInputFormat.addIterator(job, new IteratorSetting(2, "Versions", "org.apache.accumulo.core.iterators.VersioningIterator"));
- AccumuloInputFormat.addIterator(job, new IteratorSetting(3, "Count", "org.apache.accumulo.core.iterators.CountingIterator"));
-
- List<IteratorSetting> list = AccumuloInputFormat.getIterators(job);
-
- // Check the list size
- assertTrue(list.size() == 3);
-
- // Walk the list and make sure our settings are correct
- IteratorSetting setting = list.get(0);
- assertEquals(1, setting.getPriority());
- assertEquals("org.apache.accumulo.core.iterators.WholeRowIterator", setting.getIteratorClass());
- assertEquals("WholeRow", setting.getName());
-
- setting = list.get(1);
- assertEquals(2, setting.getPriority());
- assertEquals("org.apache.accumulo.core.iterators.VersioningIterator", setting.getIteratorClass());
- assertEquals("Versions", setting.getName());
-
- setting = list.get(2);
- assertEquals(3, setting.getPriority());
- assertEquals("org.apache.accumulo.core.iterators.CountingIterator", setting.getIteratorClass());
- assertEquals("Count", setting.getName());
-
- }
-
- @Test
- public void testSetRegex() throws IOException {
- @SuppressWarnings("deprecation")
- Job job = new Job();
-
- String regex = ">\"*%<>\'\\";
-
- IteratorSetting is = new IteratorSetting(50, regex, RegExFilter.class);
- RegExFilter.setRegexs(is, regex, null, null, null, false);
- AccumuloInputFormat.addIterator(job, is);
-
- assertTrue(regex.equals(AccumuloInputFormat.getIterators(job).get(0).getName()));
- }
-
- private static AssertionError e1 = null;
- private static AssertionError e2 = null;
-
- private static class MRTester extends Configured implements Tool {
- private static class TestMapper extends Mapper<Key,Value,Key,Value> {
- Key key = null;
- int count = 0;
-
- @Override
- protected void map(Key k, Value v, Context context) throws IOException, InterruptedException {
- try {
- if (key != null)
- assertEquals(key.getRow().toString(), new String(v.get()));
- assertEquals(k.getRow(), new Text(String.format("%09x", count + 1)));
- assertEquals(new String(v.get()), String.format("%09x", count));
- } catch (AssertionError e) {
- e1 = e;
- }
- key = new Key(k);
- count++;
- }
-
- @Override
- protected void cleanup(Context context) throws IOException, InterruptedException {
- try {
- assertEquals(100, count);
- } catch (AssertionError e) {
- e2 = e;
- }
- }
- }
-
- @Override
- public int run(String[] args) throws Exception {
-
- if (args.length != 5) {
- throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <user> <pass> <table> <instanceName> <inputFormatClass>");
- }
-
- String user = args[0];
- String pass = args[1];
- String table = args[2];
-
- String instanceName = args[3];
- String inputFormatClassName = args[4];
- @SuppressWarnings("unchecked")
- Class<? extends InputFormat<?,?>> inputFormatClass = (Class<? extends InputFormat<?,?>>) Class.forName(inputFormatClassName);
-
- @SuppressWarnings("deprecation")
- Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
- job.setJarByClass(this.getClass());
-
- job.setInputFormatClass(inputFormatClass);
-
- AccumuloInputFormat.setConnectorInfo(job, user, new PasswordToken(pass));
- AccumuloInputFormat.setInputTableName(job, table);
- AccumuloInputFormat.setMockInstance(job, instanceName);
-
- job.setMapperClass(TestMapper.class);
- job.setMapOutputKeyClass(Key.class);
- job.setMapOutputValueClass(Value.class);
- job.setOutputFormatClass(NullOutputFormat.class);
-
- job.setNumReduceTasks(0);
-
- job.waitForCompletion(true);
-
- return job.isSuccessful() ? 0 : 1;
- }
-
- public static int main(String[] args) throws Exception {
- return ToolRunner.run(CachedConfiguration.getInstance(), new MRTester(), args);
- }
- }
-
- @Test
- public void testMap() throws Exception {
- final String INSTANCE_NAME = PREFIX + "_mapreduce_instance";
- final String TEST_TABLE_1 = PREFIX + "_mapreduce_table_1";
-
- MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
- Connector c = mockInstance.getConnector("root", new PasswordToken(""));
- c.tableOperations().create(TEST_TABLE_1);
- BatchWriter bw = c.createBatchWriter(TEST_TABLE_1, new BatchWriterConfig());
- for (int i = 0; i < 100; i++) {
- Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
- m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
- bw.addMutation(m);
- }
- bw.close();
-
- Assert.assertEquals(0, MRTester.main(new String[] {"root", "", TEST_TABLE_1, INSTANCE_NAME, AccumuloInputFormat.class.getCanonicalName()}));
- assertNull(e1);
- assertNull(e2);
- }
-
- @Test
- public void testCorrectRangeInputSplits() throws Exception {
- @SuppressWarnings("deprecation")
- Job job = new Job(new Configuration(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
-
- String username = "user", table = "table", instance = "instance";
- PasswordToken password = new PasswordToken("password");
- Authorizations auths = new Authorizations("foo");
- Collection<Pair<Text,Text>> fetchColumns = Collections.singleton(new Pair<Text,Text>(new Text("foo"), new Text("bar")));
- boolean isolated = true, localIters = true;
- Level level = Level.WARN;
-
- Instance inst = new MockInstance(instance);
- Connector connector = inst.getConnector(username, password);
- connector.tableOperations().create(table);
-
- AccumuloInputFormat.setConnectorInfo(job, username, password);
- AccumuloInputFormat.setInputTableName(job, table);
- AccumuloInputFormat.setScanAuthorizations(job, auths);
- AccumuloInputFormat.setMockInstance(job, instance);
- AccumuloInputFormat.setScanIsolation(job, isolated);
- AccumuloInputFormat.setLocalIterators(job, localIters);
- AccumuloInputFormat.fetchColumns(job, fetchColumns);
- AccumuloInputFormat.setLogLevel(job, level);
-
- AccumuloInputFormat aif = new AccumuloInputFormat();
-
- List<InputSplit> splits = aif.getSplits(job);
-
- Assert.assertEquals(1, splits.size());
-
- InputSplit split = splits.get(0);
-
- Assert.assertEquals(RangeInputSplit.class, split.getClass());
-
- RangeInputSplit risplit = (RangeInputSplit) split;
-
- Assert.assertEquals(username, risplit.getPrincipal());
- Assert.assertEquals(table, risplit.getTableName());
- Assert.assertEquals(password, risplit.getToken());
- Assert.assertEquals(auths, risplit.getAuths());
- Assert.assertEquals(instance, risplit.getInstanceName());
- Assert.assertEquals(isolated, risplit.isIsolatedScan());
- Assert.assertEquals(localIters, risplit.usesLocalIterators());
- Assert.assertEquals(fetchColumns, risplit.getFetchedColumns());
- Assert.assertEquals(level, risplit.getLogLevel());
- }
-
- @Test
- public void testPartialInputSplitDelegationToConfiguration() throws Exception {
- String user = "testPartialInputSplitUser";
- PasswordToken password = new PasswordToken("");
-
- MockInstance mockInstance = new MockInstance("testPartialInputSplitDelegationToConfiguration");
- Connector c = mockInstance.getConnector(user, password);
- c.tableOperations().create("testtable");
- BatchWriter bw = c.createBatchWriter("testtable", new BatchWriterConfig());
- for (int i = 0; i < 100; i++) {
- Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
- m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
- bw.addMutation(m);
- }
- bw.close();
-
- Assert.assertEquals(
- 0,
- MRTester.main(new String[] {user, "", "testtable", "testPartialInputSplitDelegationToConfiguration",
- EmptySplitsAccumuloInputFormat.class.getCanonicalName()}));
- assertNull(e1);
- assertNull(e2);
- }
-
- @Test
- public void testPartialFailedInputSplitDelegationToConfiguration() throws Exception {
- String user = "testPartialFailedInputSplit";
- PasswordToken password = new PasswordToken("");
-
- MockInstance mockInstance = new MockInstance("testPartialFailedInputSplitDelegationToConfiguration");
- Connector c = mockInstance.getConnector(user, password);
- c.tableOperations().create("testtable");
- BatchWriter bw = c.createBatchWriter("testtable", new BatchWriterConfig());
- for (int i = 0; i < 100; i++) {
- Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
- m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
- bw.addMutation(m);
- }
- bw.close();
-
- // We should fail before we even get into the Mapper because we can't make the RecordReader
- Assert.assertEquals(
- 1,
- MRTester.main(new String[] {user, "", "testtable", "testPartialFailedInputSplitDelegationToConfiguration",
- BadPasswordSplitsAccumuloInputFormat.class.getCanonicalName()}));
- assertNull(e1);
- assertNull(e2);
- }
-
- @Test
- public void testEmptyColumnFamily() throws IOException {
- @SuppressWarnings("deprecation")
- Job job = new Job();
- Set<Pair<Text,Text>> cols = new HashSet<Pair<Text,Text>>();
- cols.add(new Pair<Text,Text>(new Text(""), null));
- cols.add(new Pair<Text,Text>(new Text("foo"), new Text("bar")));
- cols.add(new Pair<Text,Text>(new Text(""), new Text("bar")));
- cols.add(new Pair<Text,Text>(new Text(""), new Text("")));
- cols.add(new Pair<Text,Text>(new Text("foo"), new Text("")));
- AccumuloInputFormat.fetchColumns(job, cols);
- Set<Pair<Text,Text>> setCols = AccumuloInputFormat.getFetchedColumns(job);
- assertEquals(cols, setCols);
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormatTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormatTest.java b/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormatTest.java
deleted file mode 100644
index 05fbbb4..0000000
--- a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormatTest.java
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-
-import java.io.IOException;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.accumulo.core.client.BatchWriter;
-import org.apache.accumulo.core.client.BatchWriterConfig;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.client.security.tokens.PasswordToken;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.util.CachedConfiguration;
-import org.apache.accumulo.core.util.Pair;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.Test;
-
-public class AccumuloMultiTableInputFormatTest {
-
- private static final String PREFIX = AccumuloMultiTableInputFormatTest.class.getSimpleName();
- private static final String INSTANCE_NAME = PREFIX + "_mapreduce_instance";
- private static final String TEST_TABLE_1 = PREFIX + "_mapreduce_table_1";
- private static final String TEST_TABLE_2 = PREFIX + "_mapreduce_table_2";
-
- private static AssertionError e1 = null;
- private static AssertionError e2 = null;
-
- private static class MRTester extends Configured implements Tool {
-
- private static class TestMapper extends Mapper<Key,Value,Key,Value> {
- Key key = null;
- int count = 0;
-
- @Override
- protected void map(Key k, Value v, Context context) throws IOException, InterruptedException {
- try {
- String tableName = ((RangeInputSplit) context.getInputSplit()).getTableName();
- if (key != null)
- assertEquals(key.getRow().toString(), new String(v.get()));
- assertEquals(new Text(String.format("%s_%09x", tableName, count + 1)), k.getRow());
- assertEquals(String.format("%s_%09x", tableName, count), new String(v.get()));
- } catch (AssertionError e) {
- e1 = e;
- }
- key = new Key(k);
- count++;
- }
-
- @Override
- protected void cleanup(Context context) throws IOException, InterruptedException {
- try {
- assertEquals(100, count);
- } catch (AssertionError e) {
- e2 = e;
- }
- }
- }
-
- @Override
- public int run(String[] args) throws Exception {
-
- if (args.length != 4) {
- throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <user> <pass> <table1> <table2>");
- }
-
- String user = args[0];
- String pass = args[1];
- String table1 = args[2];
- String table2 = args[3];
-
- @SuppressWarnings("deprecation")
- Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
- job.setJarByClass(this.getClass());
-
- job.setInputFormatClass(AccumuloMultiTableInputFormat.class);
-
- AccumuloMultiTableInputFormat.setConnectorInfo(job, user, new PasswordToken(pass));
-
- InputTableConfig tableConfig1 = new InputTableConfig();
- InputTableConfig tableConfig2 = new InputTableConfig();
-
- Map<String,InputTableConfig> configMap = new HashMap<String,InputTableConfig>();
- configMap.put(table1, tableConfig1);
- configMap.put(table2, tableConfig2);
-
- AccumuloMultiTableInputFormat.setInputTableConfigs(job, configMap);
- AccumuloMultiTableInputFormat.setMockInstance(job, INSTANCE_NAME);
-
- job.setMapperClass(TestMapper.class);
- job.setMapOutputKeyClass(Key.class);
- job.setMapOutputValueClass(Value.class);
- job.setOutputFormatClass(NullOutputFormat.class);
-
- job.setNumReduceTasks(0);
-
- job.waitForCompletion(true);
-
- return job.isSuccessful() ? 0 : 1;
- }
-
- public static void main(String[] args) throws Exception {
- assertEquals(0, ToolRunner.run(CachedConfiguration.getInstance(), new MRTester(), args));
- }
- }
-
- /**
- * Generate incrementing counts and attach table name to the key/value so that order and multi-table data can be verified.
- */
- @Test
- public void testMap() throws Exception {
- MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
- Connector c = mockInstance.getConnector("root", new PasswordToken(""));
- c.tableOperations().create(TEST_TABLE_1);
- c.tableOperations().create(TEST_TABLE_2);
- BatchWriter bw = c.createBatchWriter(TEST_TABLE_1, new BatchWriterConfig());
- BatchWriter bw2 = c.createBatchWriter(TEST_TABLE_2, new BatchWriterConfig());
- for (int i = 0; i < 100; i++) {
- Mutation t1m = new Mutation(new Text(String.format("%s_%09x", TEST_TABLE_1, i + 1)));
- t1m.put(new Text(), new Text(), new Value(String.format("%s_%09x", TEST_TABLE_1, i).getBytes()));
- bw.addMutation(t1m);
- Mutation t2m = new Mutation(new Text(String.format("%s_%09x", TEST_TABLE_2, i + 1)));
- t2m.put(new Text(), new Text(), new Value(String.format("%s_%09x", TEST_TABLE_2, i).getBytes()));
- bw2.addMutation(t2m);
- }
- bw.close();
- bw2.close();
-
- MRTester.main(new String[] {"root", "", TEST_TABLE_1, TEST_TABLE_2});
- assertNull(e1);
- assertNull(e2);
- }
-
- /**
- * Verify {@link InputTableConfig} objects get correctly serialized in the JobContext.
- */
- @Test
- public void testInputTableConfigSerialization() throws IOException {
- @SuppressWarnings("deprecation")
- Job job = new Job();
-
- InputTableConfig tableConfig = new InputTableConfig().setRanges(Collections.singletonList(new Range("a", "b")))
- .fetchColumns(Collections.singleton(new Pair<Text,Text>(new Text("CF1"), new Text("CQ1"))))
- .setIterators(Collections.singletonList(new IteratorSetting(50, "iter1", "iterclass1")));
-
- Map<String,InputTableConfig> configMap = new HashMap<String,InputTableConfig>();
- configMap.put(TEST_TABLE_1, tableConfig);
- configMap.put(TEST_TABLE_2, tableConfig);
-
- AccumuloMultiTableInputFormat.setInputTableConfigs(job, configMap);
-
- assertEquals(tableConfig, AccumuloMultiTableInputFormat.getInputTableConfig(job, TEST_TABLE_1));
- assertEquals(tableConfig, AccumuloMultiTableInputFormat.getInputTableConfig(job, TEST_TABLE_2));
- }
-
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloOutputFormatTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloOutputFormatTest.java b/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloOutputFormatTest.java
deleted file mode 100644
index a0cb4e3..0000000
--- a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloOutputFormatTest.java
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotEquals;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.Map.Entry;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.accumulo.core.client.BatchWriter;
-import org.apache.accumulo.core.client.BatchWriterConfig;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.Scanner;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.client.security.tokens.PasswordToken;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.core.util.CachedConfiguration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.Test;
-
-/**
- *
- */
-public class AccumuloOutputFormatTest {
- private static AssertionError e1 = null;
- private static final String PREFIX = AccumuloOutputFormatTest.class.getSimpleName();
- private static final String INSTANCE_NAME = PREFIX + "_mapreduce_instance";
- private static final String TEST_TABLE_1 = PREFIX + "_mapreduce_table_1";
- private static final String TEST_TABLE_2 = PREFIX + "_mapreduce_table_2";
-
- private static class MRTester extends Configured implements Tool {
- private static class TestMapper extends Mapper<Key,Value,Text,Mutation> {
- Key key = null;
- int count = 0;
-
- @Override
- protected void map(Key k, Value v, Context context) throws IOException, InterruptedException {
- try {
- if (key != null)
- assertEquals(key.getRow().toString(), new String(v.get()));
- assertEquals(k.getRow(), new Text(String.format("%09x", count + 1)));
- assertEquals(new String(v.get()), String.format("%09x", count));
- } catch (AssertionError e) {
- e1 = e;
- }
- key = new Key(k);
- count++;
- }
-
- @Override
- protected void cleanup(Context context) throws IOException, InterruptedException {
- Mutation m = new Mutation("total");
- m.put("", "", Integer.toString(count));
- context.write(new Text(), m);
- }
- }
-
- @Override
- public int run(String[] args) throws Exception {
-
- if (args.length != 4) {
- throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <user> <pass> <inputtable> <outputtable>");
- }
-
- String user = args[0];
- String pass = args[1];
- String table1 = args[2];
- String table2 = args[3];
-
- @SuppressWarnings("deprecation")
- Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
- job.setJarByClass(this.getClass());
-
- job.setInputFormatClass(AccumuloInputFormat.class);
-
- AccumuloInputFormat.setConnectorInfo(job, user, new PasswordToken(pass));
- AccumuloInputFormat.setInputTableName(job, table1);
- AccumuloInputFormat.setMockInstance(job, INSTANCE_NAME);
-
- job.setMapperClass(TestMapper.class);
- job.setMapOutputKeyClass(Key.class);
- job.setMapOutputValueClass(Value.class);
- job.setOutputFormatClass(AccumuloOutputFormat.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(Mutation.class);
-
- AccumuloOutputFormat.setConnectorInfo(job, user, new PasswordToken(pass));
- AccumuloOutputFormat.setCreateTables(job, false);
- AccumuloOutputFormat.setDefaultTableName(job, table2);
- AccumuloOutputFormat.setMockInstance(job, INSTANCE_NAME);
-
- job.setNumReduceTasks(0);
-
- job.waitForCompletion(true);
-
- return job.isSuccessful() ? 0 : 1;
- }
-
- public static void main(String[] args) throws Exception {
- assertEquals(0, ToolRunner.run(CachedConfiguration.getInstance(), new MRTester(), args));
- }
- }
-
- @Test
- public void testBWSettings() throws IOException {
- @SuppressWarnings("deprecation")
- Job job = new Job();
-
- // make sure we aren't testing defaults
- final BatchWriterConfig bwDefaults = new BatchWriterConfig();
- assertNotEquals(7654321l, bwDefaults.getMaxLatency(TimeUnit.MILLISECONDS));
- assertNotEquals(9898989l, bwDefaults.getTimeout(TimeUnit.MILLISECONDS));
- assertNotEquals(42, bwDefaults.getMaxWriteThreads());
- assertNotEquals(1123581321l, bwDefaults.getMaxMemory());
-
- final BatchWriterConfig bwConfig = new BatchWriterConfig();
- bwConfig.setMaxLatency(7654321l, TimeUnit.MILLISECONDS);
- bwConfig.setTimeout(9898989l, TimeUnit.MILLISECONDS);
- bwConfig.setMaxWriteThreads(42);
- bwConfig.setMaxMemory(1123581321l);
- AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig);
-
- AccumuloOutputFormat myAOF = new AccumuloOutputFormat() {
- @Override
- public void checkOutputSpecs(JobContext job) throws IOException {
- BatchWriterConfig bwOpts = getBatchWriterOptions(job);
-
- // passive check
- assertEquals(bwConfig.getMaxLatency(TimeUnit.MILLISECONDS), bwOpts.getMaxLatency(TimeUnit.MILLISECONDS));
- assertEquals(bwConfig.getTimeout(TimeUnit.MILLISECONDS), bwOpts.getTimeout(TimeUnit.MILLISECONDS));
- assertEquals(bwConfig.getMaxWriteThreads(), bwOpts.getMaxWriteThreads());
- assertEquals(bwConfig.getMaxMemory(), bwOpts.getMaxMemory());
-
- // explicit check
- assertEquals(7654321l, bwOpts.getMaxLatency(TimeUnit.MILLISECONDS));
- assertEquals(9898989l, bwOpts.getTimeout(TimeUnit.MILLISECONDS));
- assertEquals(42, bwOpts.getMaxWriteThreads());
- assertEquals(1123581321l, bwOpts.getMaxMemory());
-
- }
- };
- myAOF.checkOutputSpecs(job);
- }
-
- @Test
- public void testMR() throws Exception {
- MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
- Connector c = mockInstance.getConnector("root", new PasswordToken(""));
- c.tableOperations().create(TEST_TABLE_1);
- c.tableOperations().create(TEST_TABLE_2);
- BatchWriter bw = c.createBatchWriter(TEST_TABLE_1, new BatchWriterConfig());
- for (int i = 0; i < 100; i++) {
- Mutation m = new Mutation(new Text(String.format("%09x", i + 1)));
- m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes()));
- bw.addMutation(m);
- }
- bw.close();
-
- MRTester.main(new String[] {"root", "", TEST_TABLE_1, TEST_TABLE_2});
- assertNull(e1);
-
- Scanner scanner = c.createScanner(TEST_TABLE_2, new Authorizations());
- Iterator<Entry<Key,Value>> iter = scanner.iterator();
- assertTrue(iter.hasNext());
- Entry<Key,Value> entry = iter.next();
- assertEquals(Integer.parseInt(new String(entry.getValue().get())), 100);
- assertFalse(iter.hasNext());
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloRowInputFormatTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloRowInputFormatTest.java b/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloRowInputFormatTest.java
deleted file mode 100644
index 2207437..0000000
--- a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloRowInputFormatTest.java
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map.Entry;
-
-import org.apache.accumulo.core.client.BatchWriter;
-import org.apache.accumulo.core.client.BatchWriterConfig;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.MutationsRejectedException;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.client.security.tokens.PasswordToken;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.KeyValue;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.security.ColumnVisibility;
-import org.apache.accumulo.core.util.CachedConfiguration;
-import org.apache.accumulo.core.util.PeekingIterator;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.junit.Test;
-
-public class AccumuloRowInputFormatTest {
- private static final String PREFIX = AccumuloRowInputFormatTest.class.getSimpleName();
- private static final String INSTANCE_NAME = PREFIX + "_mapreduce_instance";
- private static final String TEST_TABLE_1 = PREFIX + "_mapreduce_table_1";
-
- private static final String ROW1 = "row1";
- private static final String ROW2 = "row2";
- private static final String ROW3 = "row3";
- private static final String COLF1 = "colf1";
- private static List<Entry<Key,Value>> row1;
- private static List<Entry<Key,Value>> row2;
- private static List<Entry<Key,Value>> row3;
- private static AssertionError e1 = null;
- private static AssertionError e2 = null;
-
- public AccumuloRowInputFormatTest() {
- row1 = new ArrayList<Entry<Key,Value>>();
- row1.add(new KeyValue(new Key(ROW1, COLF1, "colq1"), "v1".getBytes()));
- row1.add(new KeyValue(new Key(ROW1, COLF1, "colq2"), "v2".getBytes()));
- row1.add(new KeyValue(new Key(ROW1, "colf2", "colq3"), "v3".getBytes()));
- row2 = new ArrayList<Entry<Key,Value>>();
- row2.add(new KeyValue(new Key(ROW2, COLF1, "colq4"), "v4".getBytes()));
- row3 = new ArrayList<Entry<Key,Value>>();
- row3.add(new KeyValue(new Key(ROW3, COLF1, "colq5"), "v5".getBytes()));
- }
-
- public static void checkLists(final List<Entry<Key,Value>> first, final List<Entry<Key,Value>> second) {
- assertEquals("Sizes should be the same.", first.size(), second.size());
- for (int i = 0; i < first.size(); i++) {
- assertEquals("Keys should be equal.", first.get(i).getKey(), second.get(i).getKey());
- assertEquals("Values should be equal.", first.get(i).getValue(), second.get(i).getValue());
- }
- }
-
- public static void checkLists(final List<Entry<Key,Value>> first, final Iterator<Entry<Key,Value>> second) {
- int entryIndex = 0;
- while (second.hasNext()) {
- final Entry<Key,Value> entry = second.next();
- assertEquals("Keys should be equal", first.get(entryIndex).getKey(), entry.getKey());
- assertEquals("Values should be equal", first.get(entryIndex).getValue(), entry.getValue());
- entryIndex++;
- }
- }
-
- public static void insertList(final BatchWriter writer, final List<Entry<Key,Value>> list) throws MutationsRejectedException {
- for (Entry<Key,Value> e : list) {
- final Key key = e.getKey();
- final Mutation mutation = new Mutation(key.getRow());
- ColumnVisibility colVisibility = new ColumnVisibility(key.getColumnVisibility());
- mutation.put(key.getColumnFamily(), key.getColumnQualifier(), colVisibility, key.getTimestamp(), e.getValue());
- writer.addMutation(mutation);
- }
- }
-
- private static class MRTester extends Configured implements Tool {
- private static class TestMapper extends Mapper<Text,PeekingIterator<Entry<Key,Value>>,Key,Value> {
- int count = 0;
-
- @Override
- protected void map(Text k, PeekingIterator<Entry<Key,Value>> v, Context context) throws IOException, InterruptedException {
- try {
- switch (count) {
- case 0:
- assertEquals("Current key should be " + ROW1, new Text(ROW1), k);
- checkLists(row1, v);
- break;
- case 1:
- assertEquals("Current key should be " + ROW2, new Text(ROW2), k);
- checkLists(row2, v);
- break;
- case 2:
- assertEquals("Current key should be " + ROW3, new Text(ROW3), k);
- checkLists(row3, v);
- break;
- default:
- assertTrue(false);
- }
- } catch (AssertionError e) {
- e1 = e;
- }
- count++;
- }
-
- @Override
- protected void cleanup(Context context) throws IOException, InterruptedException {
- try {
- assertEquals(3, count);
- } catch (AssertionError e) {
- e2 = e;
- }
- }
- }
-
- @Override
- public int run(String[] args) throws Exception {
-
- if (args.length != 3) {
- throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <user> <pass> <table>");
- }
-
- String user = args[0];
- String pass = args[1];
- String table = args[2];
-
- @SuppressWarnings("deprecation")
- Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
- job.setJarByClass(this.getClass());
-
- job.setInputFormatClass(AccumuloRowInputFormat.class);
-
- AccumuloInputFormat.setConnectorInfo(job, user, new PasswordToken(pass));
- AccumuloInputFormat.setInputTableName(job, table);
- AccumuloRowInputFormat.setMockInstance(job, INSTANCE_NAME);
-
- job.setMapperClass(TestMapper.class);
- job.setMapOutputKeyClass(Key.class);
- job.setMapOutputValueClass(Value.class);
- job.setOutputFormatClass(NullOutputFormat.class);
-
- job.setNumReduceTasks(0);
-
- job.waitForCompletion(true);
-
- return job.isSuccessful() ? 0 : 1;
- }
-
- public static void main(String[] args) throws Exception {
- assertEquals(0, ToolRunner.run(CachedConfiguration.getInstance(), new MRTester(), args));
- }
- }
-
- @Test
- public void test() throws Exception {
- final MockInstance instance = new MockInstance(INSTANCE_NAME);
- final Connector conn = instance.getConnector("root", new PasswordToken(""));
- conn.tableOperations().create(TEST_TABLE_1);
- BatchWriter writer = null;
- try {
- writer = conn.createBatchWriter(TEST_TABLE_1, new BatchWriterConfig());
- insertList(writer, row1);
- insertList(writer, row2);
- insertList(writer, row3);
- } finally {
- if (writer != null) {
- writer.close();
- }
- }
- MRTester.main(new String[] {"root", "", TEST_TABLE_1});
- assertNull(e1);
- assertNull(e2);
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/test/java/org/apache/accumulo/core/client/mapreduce/BadPasswordSplitsAccumuloInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/BadPasswordSplitsAccumuloInputFormat.java b/core/src/test/java/org/apache/accumulo/core/client/mapreduce/BadPasswordSplitsAccumuloInputFormat.java
deleted file mode 100644
index fce7781..0000000
--- a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/BadPasswordSplitsAccumuloInputFormat.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.accumulo.core.client.security.tokens.PasswordToken;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.JobContext;
-
-/**
- * AccumuloInputFormat which returns an "empty" RangeInputSplit
- */
-public class BadPasswordSplitsAccumuloInputFormat extends AccumuloInputFormat {
-
- @Override
- public List<InputSplit> getSplits(JobContext context) throws IOException {
- List<InputSplit> splits = super.getSplits(context);
-
- for (InputSplit split : splits) {
- org.apache.accumulo.core.client.mapreduce.RangeInputSplit rangeSplit = (org.apache.accumulo.core.client.mapreduce.RangeInputSplit) split;
- rangeSplit.setToken(new PasswordToken("anythingelse"));
- }
-
- return splits;
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/test/java/org/apache/accumulo/core/client/mapreduce/EmptySplitsAccumuloInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/EmptySplitsAccumuloInputFormat.java b/core/src/test/java/org/apache/accumulo/core/client/mapreduce/EmptySplitsAccumuloInputFormat.java
deleted file mode 100644
index dd531c0..0000000
--- a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/EmptySplitsAccumuloInputFormat.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.JobContext;
-
-/**
- * AccumuloInputFormat which returns an "empty" RangeInputSplit
- */
-public class EmptySplitsAccumuloInputFormat extends AccumuloInputFormat {
-
- @Override
- public List<InputSplit> getSplits(JobContext context) throws IOException {
- List<InputSplit> oldSplits = super.getSplits(context);
- List<InputSplit> newSplits = new ArrayList<InputSplit>(oldSplits.size());
-
- // Copy only the necessary information
- for (InputSplit oldSplit : oldSplits) {
- org.apache.accumulo.core.client.mapreduce.RangeInputSplit newSplit = new org.apache.accumulo.core.client.mapreduce.RangeInputSplit(
- (org.apache.accumulo.core.client.mapreduce.RangeInputSplit) oldSplit);
- newSplits.add(newSplit);
- }
-
- return newSplits;
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/4dfcb9de/core/src/test/java/org/apache/accumulo/core/client/mapreduce/InputTableConfigTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/InputTableConfigTest.java b/core/src/test/java/org/apache/accumulo/core/client/mapreduce/InputTableConfigTest.java
deleted file mode 100644
index 7f5c7d8..0000000
--- a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/InputTableConfigTest.java
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.core.client.mapreduce;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.client.mapreduce.InputTableConfig;
-import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.util.Pair;
-import org.apache.hadoop.io.Text;
-import org.junit.Before;
-import org.junit.Test;
-
-public class InputTableConfigTest {
-
- private InputTableConfig tableQueryConfig;
-
- @Before
- public void setUp() {
- tableQueryConfig = new InputTableConfig();
- }
-
- @Test
- public void testSerialization_OnlyTable() throws IOException {
- byte[] serialized = serialize(tableQueryConfig);
- InputTableConfig actualConfig = deserialize(serialized);
-
- assertEquals(tableQueryConfig, actualConfig);
- }
-
- @Test
- public void testSerialization_ranges() throws IOException {
- List<Range> ranges = new ArrayList<Range>();
- ranges.add(new Range("a", "b"));
- ranges.add(new Range("c", "d"));
- tableQueryConfig.setRanges(ranges);
-
- byte[] serialized = serialize(tableQueryConfig);
- InputTableConfig actualConfig = deserialize(serialized);
-
- assertEquals(ranges, actualConfig.getRanges());
- }
-
- @Test
- public void testSerialization_columns() throws IOException {
- Set<Pair<Text,Text>> columns = new HashSet<Pair<Text,Text>>();
- columns.add(new Pair<Text,Text>(new Text("cf1"), new Text("cq1")));
- columns.add(new Pair<Text,Text>(new Text("cf2"), null));
- tableQueryConfig.fetchColumns(columns);
-
- byte[] serialized = serialize(tableQueryConfig);
- InputTableConfig actualConfig = deserialize(serialized);
-
- assertEquals(actualConfig.getFetchedColumns(), columns);
- }
-
- @Test
- public void testSerialization_iterators() throws IOException {
- List<IteratorSetting> settings = new ArrayList<IteratorSetting>();
- settings.add(new IteratorSetting(50, "iter", "iterclass"));
- settings.add(new IteratorSetting(55, "iter2", "iterclass2"));
- tableQueryConfig.setIterators(settings);
- byte[] serialized = serialize(tableQueryConfig);
- InputTableConfig actualConfig = deserialize(serialized);
- assertEquals(actualConfig.getIterators(), settings);
-
- }
-
- private byte[] serialize(InputTableConfig tableQueryConfig) throws IOException {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- tableQueryConfig.write(new DataOutputStream(baos));
- baos.close();
- return baos.toByteArray();
- }
-
- private InputTableConfig deserialize(byte[] bytes) throws IOException {
- ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
- InputTableConfig actualConfig = new InputTableConfig(new DataInputStream(bais));
- bais.close();
- return actualConfig;
- }
-}