You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by mw...@apache.org on 2016/12/09 17:16:49 UTC
[3/7] accumulo git commit: ACCUMULO-4511 Removed Accumulo Examples
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8e0f19a1/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RowHash.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RowHash.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RowHash.java
deleted file mode 100644
index fab2532..0000000
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RowHash.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.simple.mapreduce;
-
-import java.io.IOException;
-import java.util.Base64;
-import java.util.Collections;
-
-import org.apache.accumulo.core.cli.MapReduceClientOnRequiredTable;
-import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
-import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.util.Pair;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.io.MD5Hash;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-import com.beust.jcommander.Parameter;
-
-public class RowHash extends Configured implements Tool {
- /**
- * The Mapper class that given a row number, will generate the appropriate output line.
- */
- public static class HashDataMapper extends Mapper<Key,Value,Text,Mutation> {
- @Override
- public void map(Key row, Value data, Context context) throws IOException, InterruptedException {
- Mutation m = new Mutation(row.getRow());
- m.put(new Text("cf-HASHTYPE"), new Text("cq-MD5BASE64"), new Value(Base64.getEncoder().encode(MD5Hash.digest(data.toString()).getDigest())));
- context.write(null, m);
- context.progress();
- }
-
- @Override
- public void setup(Context job) {}
- }
-
- private static class Opts extends MapReduceClientOnRequiredTable {
- @Parameter(names = "--column", required = true)
- String column;
- }
-
- @Override
- public int run(String[] args) throws Exception {
- Job job = Job.getInstance(getConf());
- job.setJobName(this.getClass().getName());
- job.setJarByClass(this.getClass());
- Opts opts = new Opts();
- opts.parseArgs(RowHash.class.getName(), args);
- job.setInputFormatClass(AccumuloInputFormat.class);
- opts.setAccumuloConfigs(job);
-
- String col = opts.column;
- int idx = col.indexOf(":");
- Text cf = new Text(idx < 0 ? col : col.substring(0, idx));
- Text cq = idx < 0 ? null : new Text(col.substring(idx + 1));
- if (cf.getLength() > 0)
- AccumuloInputFormat.fetchColumns(job, Collections.singleton(new Pair<>(cf, cq)));
-
- job.setMapperClass(HashDataMapper.class);
- job.setMapOutputKeyClass(Text.class);
- job.setMapOutputValueClass(Mutation.class);
-
- job.setNumReduceTasks(0);
-
- job.setOutputFormatClass(AccumuloOutputFormat.class);
-
- job.waitForCompletion(true);
- return job.isSuccessful() ? 0 : 1;
- }
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new Configuration(), new RowHash(), args);
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8e0f19a1/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TableToFile.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TableToFile.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TableToFile.java
deleted file mode 100644
index 96603ad..0000000
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TableToFile.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.simple.mapreduce;
-
-import java.io.IOException;
-import java.util.AbstractMap.SimpleImmutableEntry;
-import java.util.HashSet;
-import java.util.Map;
-
-import org.apache.accumulo.core.cli.MapReduceClientOnRequiredTable;
-import org.apache.accumulo.core.client.AccumuloSecurityException;
-import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.util.Pair;
-import org.apache.accumulo.core.util.format.DefaultFormatter;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-import com.beust.jcommander.Parameter;
-
-/**
- * Takes a table and outputs the specified column to a set of part files on hdfs
- * {@code accumulo accumulo.examples.mapreduce.TableToFile <username> <password> <tablename> <column> <hdfs-output-path>}
- */
-public class TableToFile extends Configured implements Tool {
-
- static class Opts extends MapReduceClientOnRequiredTable {
- @Parameter(names = "--output", description = "output directory", required = true)
- String output;
- @Parameter(names = "--columns", description = "columns to extract, in cf:cq{,cf:cq,...} form")
- String columns = "";
- }
-
- /**
- * The Mapper class that given a row number, will generate the appropriate output line.
- */
- public static class TTFMapper extends Mapper<Key,Value,NullWritable,Text> {
- @Override
- public void map(Key row, Value data, Context context) throws IOException, InterruptedException {
- Map.Entry<Key,Value> entry = new SimpleImmutableEntry<>(row, data);
- context.write(NullWritable.get(), new Text(DefaultFormatter.formatEntry(entry, false)));
- context.setStatus("Outputed Value");
- }
- }
-
- @Override
- public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException, AccumuloSecurityException {
- Job job = Job.getInstance(getConf());
- job.setJobName(this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
- job.setJarByClass(this.getClass());
- Opts opts = new Opts();
- opts.parseArgs(getClass().getName(), args);
-
- job.setInputFormatClass(AccumuloInputFormat.class);
- opts.setAccumuloConfigs(job);
-
- HashSet<Pair<Text,Text>> columnsToFetch = new HashSet<>();
- for (String col : opts.columns.split(",")) {
- int idx = col.indexOf(":");
- Text cf = new Text(idx < 0 ? col : col.substring(0, idx));
- Text cq = idx < 0 ? null : new Text(col.substring(idx + 1));
- if (cf.getLength() > 0)
- columnsToFetch.add(new Pair<>(cf, cq));
- }
- if (!columnsToFetch.isEmpty())
- AccumuloInputFormat.fetchColumns(job, columnsToFetch);
-
- job.setMapperClass(TTFMapper.class);
- job.setMapOutputKeyClass(NullWritable.class);
- job.setMapOutputValueClass(Text.class);
-
- job.setNumReduceTasks(0);
-
- job.setOutputFormatClass(TextOutputFormat.class);
- TextOutputFormat.setOutputPath(job, new Path(opts.output));
-
- job.waitForCompletion(true);
- return job.isSuccessful() ? 0 : 1;
- }
-
- /**
- *
- * @param args
- * instanceName zookeepers username password table columns outputpath
- */
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new Configuration(), new TableToFile(), args);
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8e0f19a1/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TeraSortIngest.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TeraSortIngest.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TeraSortIngest.java
deleted file mode 100644
index b0b5177..0000000
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TeraSortIngest.java
+++ /dev/null
@@ -1,399 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.accumulo.examples.simple.mapreduce;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Random;
-
-import org.apache.accumulo.core.cli.MapReduceClientOnRequiredTable;
-import org.apache.accumulo.core.client.BatchWriterConfig;
-import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Value;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableUtils;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-import com.beust.jcommander.Parameter;
-
-/**
- * Generate the *almost* official terasort input data set. (See below) The user specifies the number of rows and the output directory and this class runs a
- * map/reduce program to generate the data. The format of the data is:
- * <ul>
- * <li>(10 bytes key) (10 bytes rowid) (78 bytes filler) \r \n
- * <li>The keys are random characters from the set ' ' .. '~'.
- * <li>The rowid is the right justified row id as a int.
- * <li>The filler consists of 7 runs of 10 characters from 'A' to 'Z'.
- * </ul>
- *
- * This TeraSort is slightly modified to allow for variable length key sizes and value sizes. The row length isn't variable. To generate a terabyte of data in
- * the same way TeraSort does use 10000000000 rows and 10/10 byte key length and 78/78 byte value length. Along with the 10 byte row id and \r\n this gives you
- * 100 byte row * 10000000000 rows = 1tb. Min/Max ranges for key and value parameters are inclusive/inclusive respectively.
- *
- *
- */
-public class TeraSortIngest extends Configured implements Tool {
- /**
- * An input format that assigns ranges of longs to each mapper.
- */
- static class RangeInputFormat extends InputFormat<LongWritable,NullWritable> {
- /**
- * An input split consisting of a range on numbers.
- */
- static class RangeInputSplit extends InputSplit implements Writable {
- long firstRow;
- long rowCount;
-
- public RangeInputSplit() {}
-
- public RangeInputSplit(long offset, long length) {
- firstRow = offset;
- rowCount = length;
- }
-
- @Override
- public long getLength() throws IOException {
- return 0;
- }
-
- @Override
- public String[] getLocations() throws IOException {
- return new String[] {};
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- firstRow = WritableUtils.readVLong(in);
- rowCount = WritableUtils.readVLong(in);
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- WritableUtils.writeVLong(out, firstRow);
- WritableUtils.writeVLong(out, rowCount);
- }
- }
-
- /**
- * A record reader that will generate a range of numbers.
- */
- static class RangeRecordReader extends RecordReader<LongWritable,NullWritable> {
- long startRow;
- long finishedRows;
- long totalRows;
-
- public RangeRecordReader(RangeInputSplit split) {
- startRow = split.firstRow;
- finishedRows = 0;
- totalRows = split.rowCount;
- }
-
- @Override
- public void close() throws IOException {}
-
- @Override
- public float getProgress() throws IOException {
- return finishedRows / (float) totalRows;
- }
-
- @Override
- public LongWritable getCurrentKey() throws IOException, InterruptedException {
- return new LongWritable(startRow + finishedRows);
- }
-
- @Override
- public NullWritable getCurrentValue() throws IOException, InterruptedException {
- return NullWritable.get();
- }
-
- @Override
- public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {}
-
- @Override
- public boolean nextKeyValue() throws IOException, InterruptedException {
- if (finishedRows < totalRows) {
- ++finishedRows;
- return true;
- }
- return false;
- }
- }
-
- @Override
- public RecordReader<LongWritable,NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException {
- // reporter.setStatus("Creating record reader");
- return new RangeRecordReader((RangeInputSplit) split);
- }
-
- /**
- * Create the desired number of splits, dividing the number of rows between the mappers.
- */
- @Override
- public List<InputSplit> getSplits(JobContext job) {
- long totalRows = job.getConfiguration().getLong(NUMROWS, 0);
- int numSplits = job.getConfiguration().getInt(NUMSPLITS, 1);
- long rowsPerSplit = totalRows / numSplits;
- System.out.println("Generating " + totalRows + " using " + numSplits + " maps with step of " + rowsPerSplit);
- ArrayList<InputSplit> splits = new ArrayList<>(numSplits);
- long currentRow = 0;
- for (int split = 0; split < numSplits - 1; ++split) {
- splits.add(new RangeInputSplit(currentRow, rowsPerSplit));
- currentRow += rowsPerSplit;
- }
- splits.add(new RangeInputSplit(currentRow, totalRows - currentRow));
- System.out.println("Done Generating.");
- return splits;
- }
-
- }
-
- private static String NUMSPLITS = "terasort.overridesplits";
- private static String NUMROWS = "terasort.numrows";
-
- static class RandomGenerator {
- private long seed = 0;
- private static final long mask32 = (1l << 32) - 1;
- /**
- * The number of iterations separating the precomputed seeds.
- */
- private static final int seedSkip = 128 * 1024 * 1024;
- /**
- * The precomputed seed values after every seedSkip iterations. There should be enough values so that a 2**32 iterations are covered.
- */
- private static final long[] seeds = new long[] {0L, 4160749568L, 4026531840L, 3892314112L, 3758096384L, 3623878656L, 3489660928L, 3355443200L, 3221225472L,
- 3087007744L, 2952790016L, 2818572288L, 2684354560L, 2550136832L, 2415919104L, 2281701376L, 2147483648L, 2013265920L, 1879048192L, 1744830464L,
- 1610612736L, 1476395008L, 1342177280L, 1207959552L, 1073741824L, 939524096L, 805306368L, 671088640L, 536870912L, 402653184L, 268435456L, 134217728L,};
-
- /**
- * Start the random number generator on the given iteration.
- *
- * @param initalIteration
- * the iteration number to start on
- */
- RandomGenerator(long initalIteration) {
- int baseIndex = (int) ((initalIteration & mask32) / seedSkip);
- seed = seeds[baseIndex];
- for (int i = 0; i < initalIteration % seedSkip; ++i) {
- next();
- }
- }
-
- RandomGenerator() {
- this(0);
- }
-
- long next() {
- seed = (seed * 3141592621l + 663896637) & mask32;
- return seed;
- }
- }
-
- /**
- * The Mapper class that given a row number, will generate the appropriate output line.
- */
- public static class SortGenMapper extends Mapper<LongWritable,NullWritable,Text,Mutation> {
- private Text tableName = null;
- private int minkeylength = 0;
- private int maxkeylength = 0;
- private int minvaluelength = 0;
- private int maxvaluelength = 0;
-
- private Text key = new Text();
- private Text value = new Text();
- private RandomGenerator rand;
- private byte[] keyBytes; // = new byte[12];
- private byte[] spaces = " ".getBytes();
- private byte[][] filler = new byte[26][];
- {
- for (int i = 0; i < 26; ++i) {
- filler[i] = new byte[10];
- for (int j = 0; j < 10; ++j) {
- filler[i][j] = (byte) ('A' + i);
- }
- }
- }
-
- /**
- * Add a random key to the text
- */
- private Random random = new Random();
-
- private void addKey() {
- int range = random.nextInt(maxkeylength - minkeylength + 1);
- int keylen = range + minkeylength;
- int keyceil = keylen + (4 - (keylen % 4));
- keyBytes = new byte[keyceil];
-
- long temp = 0;
- for (int i = 0; i < keyceil / 4; i++) {
- temp = rand.next() / 52;
- keyBytes[3 + 4 * i] = (byte) (' ' + (temp % 95));
- temp /= 95;
- keyBytes[2 + 4 * i] = (byte) (' ' + (temp % 95));
- temp /= 95;
- keyBytes[1 + 4 * i] = (byte) (' ' + (temp % 95));
- temp /= 95;
- keyBytes[4 * i] = (byte) (' ' + (temp % 95));
- }
- key.set(keyBytes, 0, keylen);
- }
-
- /**
- * Add the rowid to the row.
- */
- private Text getRowIdString(long rowId) {
- Text paddedRowIdString = new Text();
- byte[] rowid = Integer.toString((int) rowId).getBytes();
- int padSpace = 10 - rowid.length;
- if (padSpace > 0) {
- paddedRowIdString.append(spaces, 0, 10 - rowid.length);
- }
- paddedRowIdString.append(rowid, 0, Math.min(rowid.length, 10));
- return paddedRowIdString;
- }
-
- /**
- * Add the required filler bytes. Each row consists of 7 blocks of 10 characters and 1 block of 8 characters.
- *
- * @param rowId
- * the current row number
- */
- private void addFiller(long rowId) {
- int base = (int) ((rowId * 8) % 26);
-
- // Get Random var
- Random random = new Random(rand.seed);
-
- int range = random.nextInt(maxvaluelength - minvaluelength + 1);
- int valuelen = range + minvaluelength;
-
- while (valuelen > 10) {
- value.append(filler[(base + valuelen) % 26], 0, 10);
- valuelen -= 10;
- }
-
- if (valuelen > 0)
- value.append(filler[(base + valuelen) % 26], 0, valuelen);
- }
-
- @Override
- public void map(LongWritable row, NullWritable ignored, Context context) throws IOException, InterruptedException {
- context.setStatus("Entering");
- long rowId = row.get();
- if (rand == null) {
- // we use 3 random numbers per a row
- rand = new RandomGenerator(rowId * 3);
- }
- addKey();
- value.clear();
- // addRowId(rowId);
- addFiller(rowId);
-
- // New
- Mutation m = new Mutation(key);
- m.put(new Text("c"), // column family
- getRowIdString(rowId), // column qual
- new Value(value.toString().getBytes())); // data
-
- context.setStatus("About to add to accumulo");
- context.write(tableName, m);
- context.setStatus("Added to accumulo " + key.toString());
- }
-
- @Override
- public void setup(Context job) {
- minkeylength = job.getConfiguration().getInt("cloudgen.minkeylength", 0);
- maxkeylength = job.getConfiguration().getInt("cloudgen.maxkeylength", 0);
- minvaluelength = job.getConfiguration().getInt("cloudgen.minvaluelength", 0);
- maxvaluelength = job.getConfiguration().getInt("cloudgen.maxvaluelength", 0);
- tableName = new Text(job.getConfiguration().get("cloudgen.tablename"));
- }
- }
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new Configuration(), new TeraSortIngest(), args);
- }
-
- static class Opts extends MapReduceClientOnRequiredTable {
- @Parameter(names = "--count", description = "number of rows to ingest", required = true)
- long numRows;
- @Parameter(names = {"-nk", "--minKeySize"}, description = "miniumum key size", required = true)
- int minKeyLength;
- @Parameter(names = {"-xk", "--maxKeySize"}, description = "maximum key size", required = true)
- int maxKeyLength;
- @Parameter(names = {"-nv", "--minValueSize"}, description = "minimum key size", required = true)
- int minValueLength;
- @Parameter(names = {"-xv", "--maxValueSize"}, description = "maximum key size", required = true)
- int maxValueLength;
- @Parameter(names = "--splits", description = "number of splits to create in the table")
- int splits = 0;
- }
-
- @Override
- public int run(String[] args) throws Exception {
- Job job = Job.getInstance(getConf());
- job.setJobName("TeraSortCloud");
- job.setJarByClass(this.getClass());
- Opts opts = new Opts();
- opts.parseArgs(TeraSortIngest.class.getName(), args);
-
- job.setInputFormatClass(RangeInputFormat.class);
- job.setMapperClass(SortGenMapper.class);
- job.setMapOutputKeyClass(Text.class);
- job.setMapOutputValueClass(Mutation.class);
-
- job.setNumReduceTasks(0);
-
- job.setOutputFormatClass(AccumuloOutputFormat.class);
- opts.setAccumuloConfigs(job);
- BatchWriterConfig bwConfig = new BatchWriterConfig().setMaxMemory(10L * 1000 * 1000);
- AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig);
-
- Configuration conf = job.getConfiguration();
- conf.setLong(NUMROWS, opts.numRows);
- conf.setInt("cloudgen.minkeylength", opts.minKeyLength);
- conf.setInt("cloudgen.maxkeylength", opts.maxKeyLength);
- conf.setInt("cloudgen.minvaluelength", opts.minValueLength);
- conf.setInt("cloudgen.maxvaluelength", opts.maxValueLength);
- conf.set("cloudgen.tablename", opts.getTableName());
-
- if (args.length > 10)
- conf.setInt(NUMSPLITS, opts.splits);
-
- job.waitForCompletion(true);
- return job.isSuccessful() ? 0 : 1;
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8e0f19a1/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TokenFileWordCount.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TokenFileWordCount.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TokenFileWordCount.java
deleted file mode 100644
index 74c40a5..0000000
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TokenFileWordCount.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.simple.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.accumulo.core.client.ClientConfiguration;
-import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Value;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * A simple map reduce job that inserts word counts into accumulo. See the README for instructions on how to run this. This version does not use the ClientOpts
- * class to parse arguments as an example of using AccumuloInputFormat and AccumuloOutputFormat directly. See README.mapred for more details.
- *
- */
-public class TokenFileWordCount extends Configured implements Tool {
-
- private static final Logger log = LoggerFactory.getLogger(TokenFileWordCount.class);
-
- public static class MapClass extends Mapper<LongWritable,Text,Text,Mutation> {
- @Override
- public void map(LongWritable key, Text value, Context output) throws IOException {
- String[] words = value.toString().split("\\s+");
-
- for (String word : words) {
-
- Mutation mutation = new Mutation(new Text(word));
- mutation.put(new Text("count"), new Text("20080906"), new Value("1".getBytes()));
-
- try {
- output.write(null, mutation);
- } catch (InterruptedException e) {
- log.error("Could not write to Context.", e);
- }
- }
- }
- }
-
- @Override
- public int run(String[] args) throws Exception {
-
- String instance = args[0];
- String zookeepers = args[1];
- String user = args[2];
- String tokenFile = args[3];
- String input = args[4];
- String tableName = args[5];
-
- Job job = Job.getInstance(getConf());
- job.setJobName(TokenFileWordCount.class.getName());
- job.setJarByClass(this.getClass());
-
- job.setInputFormatClass(TextInputFormat.class);
- TextInputFormat.setInputPaths(job, input);
-
- job.setMapperClass(MapClass.class);
-
- job.setNumReduceTasks(0);
-
- job.setOutputFormatClass(AccumuloOutputFormat.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(Mutation.class);
-
- // AccumuloInputFormat not used here, but it uses the same functions.
- AccumuloOutputFormat.setZooKeeperInstance(job, ClientConfiguration.loadDefault().withInstance(instance).withZkHosts(zookeepers));
- AccumuloOutputFormat.setConnectorInfo(job, user, tokenFile);
- AccumuloOutputFormat.setCreateTables(job, true);
- AccumuloOutputFormat.setDefaultTableName(job, tableName);
-
- job.waitForCompletion(true);
- return 0;
- }
-
- public static void main(String[] args) throws Exception {
- int res = ToolRunner.run(new Configuration(), new TokenFileWordCount(), args);
- System.exit(res);
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8e0f19a1/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/UniqueColumns.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/UniqueColumns.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/UniqueColumns.java
deleted file mode 100644
index 9cdf8d0..0000000
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/UniqueColumns.java
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.simple.mapreduce;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.HashSet;
-
-import org.apache.accumulo.core.cli.MapReduceClientOnRequiredTable;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
-import org.apache.accumulo.core.data.ByteSequence;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Value;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-import com.beust.jcommander.Parameter;
-
-/**
- * A simple map reduce job that computes the unique column families and column qualifiers in a table. This example shows one way to run against an offline
- * table.
- */
-public class UniqueColumns extends Configured implements Tool {
-
- private static final Text EMPTY = new Text();
-
- public static class UMapper extends Mapper<Key,Value,Text,Text> {
- private Text temp = new Text();
- private static final Text CF = new Text("cf:");
- private static final Text CQ = new Text("cq:");
-
- @Override
- public void map(Key key, Value value, Context context) throws IOException, InterruptedException {
- temp.set(CF);
- ByteSequence cf = key.getColumnFamilyData();
- temp.append(cf.getBackingArray(), cf.offset(), cf.length());
- context.write(temp, EMPTY);
-
- temp.set(CQ);
- ByteSequence cq = key.getColumnQualifierData();
- temp.append(cq.getBackingArray(), cq.offset(), cq.length());
- context.write(temp, EMPTY);
- }
- }
-
- public static class UReducer extends Reducer<Text,Text,Text,Text> {
- @Override
- public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
- context.write(key, EMPTY);
- }
- }
-
- static class Opts extends MapReduceClientOnRequiredTable {
- @Parameter(names = "--output", description = "output directory")
- String output;
- @Parameter(names = "--reducers", description = "number of reducers to use", required = true)
- int reducers;
- @Parameter(names = "--offline", description = "run against an offline table")
- boolean offline = false;
- }
-
- @Override
- public int run(String[] args) throws Exception {
- Opts opts = new Opts();
- opts.parseArgs(UniqueColumns.class.getName(), args);
-
- String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis();
-
- Job job = Job.getInstance(getConf());
- job.setJobName(jobName);
- job.setJarByClass(this.getClass());
-
- String clone = opts.getTableName();
- Connector conn = null;
-
- opts.setAccumuloConfigs(job);
-
- if (opts.offline) {
- /*
- * this example clones the table and takes it offline. If you plan to run map reduce jobs over a table many times, it may be more efficient to compact the
- * table, clone it, and then keep using the same clone as input for map reduce.
- */
-
- conn = opts.getConnector();
- clone = opts.getTableName() + "_" + jobName;
- conn.tableOperations().clone(opts.getTableName(), clone, true, new HashMap<String,String>(), new HashSet<String>());
- conn.tableOperations().offline(clone);
-
- AccumuloInputFormat.setOfflineTableScan(job, true);
- AccumuloInputFormat.setInputTableName(job, clone);
- }
-
- job.setInputFormatClass(AccumuloInputFormat.class);
-
- job.setMapperClass(UMapper.class);
- job.setMapOutputKeyClass(Text.class);
- job.setMapOutputValueClass(Text.class);
-
- job.setCombinerClass(UReducer.class);
- job.setReducerClass(UReducer.class);
-
- job.setNumReduceTasks(opts.reducers);
-
- job.setOutputFormatClass(TextOutputFormat.class);
- TextOutputFormat.setOutputPath(job, new Path(opts.output));
-
- job.waitForCompletion(true);
-
- if (opts.offline) {
- conn.tableOperations().delete(clone);
- }
-
- return job.isSuccessful() ? 0 : 1;
- }
-
- public static void main(String[] args) throws Exception {
- int res = ToolRunner.run(new Configuration(), new UniqueColumns(), args);
- System.exit(res);
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8e0f19a1/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/WordCount.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/WordCount.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/WordCount.java
deleted file mode 100644
index 604d05d..0000000
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/WordCount.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.simple.mapreduce;
-
-import java.io.IOException;
-
-import org.apache.accumulo.core.cli.MapReduceClientOnRequiredTable;
-import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Value;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.beust.jcommander.Parameter;
-
-/**
- * A simple map reduce job that inserts word counts into accumulo. See the README for instructions on how to run this.
- *
- */
-public class WordCount extends Configured implements Tool {
-
- private static final Logger log = LoggerFactory.getLogger(WordCount.class);
-
- static class Opts extends MapReduceClientOnRequiredTable {
- @Parameter(names = "--input", description = "input directory")
- String inputDirectory;
- }
-
- public static class MapClass extends Mapper<LongWritable,Text,Text,Mutation> {
- @Override
- public void map(LongWritable key, Text value, Context output) throws IOException {
- String[] words = value.toString().split("\\s+");
-
- for (String word : words) {
-
- Mutation mutation = new Mutation(new Text(word));
- mutation.put(new Text("count"), new Text("20080906"), new Value("1".getBytes()));
-
- try {
- output.write(null, mutation);
- } catch (InterruptedException e) {
- log.error("Could not write mutation to Context.", e);
- }
- }
- }
- }
-
- @Override
- public int run(String[] args) throws Exception {
- Opts opts = new Opts();
- opts.parseArgs(WordCount.class.getName(), args);
-
- Job job = Job.getInstance(getConf());
- job.setJobName(WordCount.class.getName());
- job.setJarByClass(this.getClass());
-
- job.setInputFormatClass(TextInputFormat.class);
- TextInputFormat.setInputPaths(job, new Path(opts.inputDirectory));
-
- job.setMapperClass(MapClass.class);
-
- job.setNumReduceTasks(0);
-
- job.setOutputFormatClass(AccumuloOutputFormat.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(Mutation.class);
- opts.setAccumuloConfigs(job);
- job.waitForCompletion(true);
- return 0;
- }
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new Configuration(), new WordCount(), args);
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8e0f19a1/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/BulkIngestExample.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/BulkIngestExample.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/BulkIngestExample.java
deleted file mode 100644
index 42ec5ea..0000000
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/BulkIngestExample.java
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.simple.mapreduce.bulk;
-
-import java.io.BufferedOutputStream;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.Base64;
-import java.util.Collection;
-
-import org.apache.accumulo.core.cli.MapReduceClientOnRequiredTable;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat;
-import org.apache.accumulo.core.client.mapreduce.lib.partition.RangePartitioner;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.util.TextUtil;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FsShell;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-import com.beust.jcommander.Parameter;
-
-/**
- * Example map reduce job that bulk ingest data into an accumulo table. The expected input is text files containing tab separated key value pairs on each line.
- */
-public class BulkIngestExample extends Configured implements Tool {
- public static class MapClass extends Mapper<LongWritable,Text,Text,Text> {
- private Text outputKey = new Text();
- private Text outputValue = new Text();
-
- @Override
- public void map(LongWritable key, Text value, Context output) throws IOException, InterruptedException {
- // split on tab
- int index = -1;
- for (int i = 0; i < value.getLength(); i++) {
- if (value.getBytes()[i] == '\t') {
- index = i;
- break;
- }
- }
-
- if (index > 0) {
- outputKey.set(value.getBytes(), 0, index);
- outputValue.set(value.getBytes(), index + 1, value.getLength() - (index + 1));
- output.write(outputKey, outputValue);
- }
- }
- }
-
- public static class ReduceClass extends Reducer<Text,Text,Key,Value> {
- @Override
- public void reduce(Text key, Iterable<Text> values, Context output) throws IOException, InterruptedException {
- // be careful with the timestamp... if you run on a cluster
- // where the time is whacked you may not see your updates in
- // accumulo if there is already an existing value with a later
- // timestamp in accumulo... so make sure ntp is running on the
- // cluster or consider using logical time... one options is
- // to let accumulo set the time
- long timestamp = System.currentTimeMillis();
-
- int index = 0;
- for (Text value : values) {
- Key outputKey = new Key(key, new Text("colf"), new Text(String.format("col_%07d", index)), timestamp);
- index++;
-
- Value outputValue = new Value(value.getBytes(), 0, value.getLength());
- output.write(outputKey, outputValue);
- }
- }
- }
-
- static class Opts extends MapReduceClientOnRequiredTable {
- @Parameter(names = "--inputDir", required = true)
- String inputDir;
- @Parameter(names = "--workDir", required = true)
- String workDir;
- }
-
- @Override
- public int run(String[] args) {
- Opts opts = new Opts();
- opts.parseArgs(BulkIngestExample.class.getName(), args);
-
- Configuration conf = getConf();
- PrintStream out = null;
- try {
- Job job = Job.getInstance(conf);
- job.setJobName("bulk ingest example");
- job.setJarByClass(this.getClass());
-
- job.setInputFormatClass(TextInputFormat.class);
-
- job.setMapperClass(MapClass.class);
- job.setMapOutputKeyClass(Text.class);
- job.setMapOutputValueClass(Text.class);
-
- job.setReducerClass(ReduceClass.class);
- job.setOutputFormatClass(AccumuloFileOutputFormat.class);
- opts.setAccumuloConfigs(job);
-
- Connector connector = opts.getConnector();
-
- TextInputFormat.setInputPaths(job, new Path(opts.inputDir));
- AccumuloFileOutputFormat.setOutputPath(job, new Path(opts.workDir + "/files"));
-
- FileSystem fs = FileSystem.get(conf);
- out = new PrintStream(new BufferedOutputStream(fs.create(new Path(opts.workDir + "/splits.txt"))));
-
- Collection<Text> splits = connector.tableOperations().listSplits(opts.getTableName(), 100);
- for (Text split : splits)
- out.println(Base64.getEncoder().encodeToString(TextUtil.getBytes(split)));
-
- job.setNumReduceTasks(splits.size() + 1);
- out.close();
-
- job.setPartitionerClass(RangePartitioner.class);
- RangePartitioner.setSplitFile(job, opts.workDir + "/splits.txt");
-
- job.waitForCompletion(true);
- Path failures = new Path(opts.workDir, "failures");
- fs.delete(failures, true);
- fs.mkdirs(new Path(opts.workDir, "failures"));
- // With HDFS permissions on, we need to make sure the Accumulo user can read/move the rfiles
- FsShell fsShell = new FsShell(conf);
- fsShell.run(new String[] {"-chmod", "-R", "777", opts.workDir});
- connector.tableOperations().importDirectory(opts.getTableName(), opts.workDir + "/files", opts.workDir + "/failures", false);
-
- } catch (Exception e) {
- throw new RuntimeException(e);
- } finally {
- if (out != null)
- out.close();
- }
-
- return 0;
- }
-
- public static void main(String[] args) throws Exception {
- int res = ToolRunner.run(new Configuration(), new BulkIngestExample(), args);
- System.exit(res);
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8e0f19a1/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/GenerateTestData.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/GenerateTestData.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/GenerateTestData.java
deleted file mode 100644
index 5cb4a0b..0000000
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/GenerateTestData.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.simple.mapreduce.bulk;
-
-import java.io.BufferedOutputStream;
-import java.io.IOException;
-import java.io.PrintStream;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-
-import com.beust.jcommander.Parameter;
-
-public class GenerateTestData {
-
- static class Opts extends org.apache.accumulo.core.cli.Help {
- @Parameter(names = "--start-row", required = true)
- int startRow = 0;
- @Parameter(names = "--count", required = true)
- int numRows = 0;
- @Parameter(names = "--output", required = true)
- String outputFile;
- }
-
- public static void main(String[] args) throws IOException {
- Opts opts = new Opts();
- opts.parseArgs(GenerateTestData.class.getName(), args);
-
- FileSystem fs = FileSystem.get(new Configuration());
- PrintStream out = new PrintStream(new BufferedOutputStream(fs.create(new Path(opts.outputFile))));
-
- for (int i = 0; i < opts.numRows; i++) {
- out.println(String.format("row_%010d\tvalue_%010d", i + opts.startRow, i + opts.startRow));
- }
- out.close();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8e0f19a1/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/SetupTable.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/SetupTable.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/SetupTable.java
deleted file mode 100644
index 0fc3110..0000000
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/SetupTable.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.simple.mapreduce.bulk;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.TreeSet;
-
-import org.apache.accumulo.core.cli.ClientOnRequiredTable;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.hadoop.io.Text;
-
-import com.beust.jcommander.Parameter;
-
-public class SetupTable {
-
- static class Opts extends ClientOnRequiredTable {
- @Parameter(description = "<split> { <split> ... } ")
- List<String> splits = new ArrayList<>();
- }
-
- public static void main(String[] args) throws Exception {
- Opts opts = new Opts();
- opts.parseArgs(SetupTable.class.getName(), args);
- Connector conn = opts.getConnector();
- conn.tableOperations().create(opts.getTableName());
- if (!opts.splits.isEmpty()) {
- // create a table with initial partitions
- TreeSet<Text> intialPartitions = new TreeSet<>();
- for (String split : opts.splits) {
- intialPartitions.add(new Text(split));
- }
- conn.tableOperations().addSplits(opts.getTableName(), intialPartitions);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8e0f19a1/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/VerifyIngest.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/VerifyIngest.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/VerifyIngest.java
deleted file mode 100644
index 16530cc..0000000
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/VerifyIngest.java
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.simple.mapreduce.bulk;
-
-import java.util.Iterator;
-import java.util.Map.Entry;
-
-import org.apache.accumulo.core.cli.ClientOnRequiredTable;
-import org.apache.accumulo.core.client.AccumuloException;
-import org.apache.accumulo.core.client.AccumuloSecurityException;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.Scanner;
-import org.apache.accumulo.core.client.TableNotFoundException;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.data.Value;
-import org.apache.hadoop.io.Text;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.beust.jcommander.Parameter;
-
-public class VerifyIngest {
- private static final Logger log = LoggerFactory.getLogger(VerifyIngest.class);
-
- static class Opts extends ClientOnRequiredTable {
- @Parameter(names = "--start-row")
- int startRow = 0;
- @Parameter(names = "--count", required = true, description = "number of rows to verify")
- int numRows = 0;
- }
-
- public static void main(String[] args) throws AccumuloException, AccumuloSecurityException, TableNotFoundException {
- Opts opts = new Opts();
- opts.parseArgs(VerifyIngest.class.getName(), args);
-
- Connector connector = opts.getConnector();
- Scanner scanner = connector.createScanner(opts.getTableName(), opts.auths);
-
- scanner.setRange(new Range(new Text(String.format("row_%010d", opts.startRow)), null));
-
- Iterator<Entry<Key,Value>> si = scanner.iterator();
-
- boolean ok = true;
-
- for (int i = opts.startRow; i < opts.numRows; i++) {
-
- if (si.hasNext()) {
- Entry<Key,Value> entry = si.next();
-
- if (!entry.getKey().getRow().toString().equals(String.format("row_%010d", i))) {
- log.error("unexpected row key " + entry.getKey().getRow().toString() + " expected " + String.format("row_%010d", i));
- ok = false;
- }
-
- if (!entry.getValue().toString().equals(String.format("value_%010d", i))) {
- log.error("unexpected value " + entry.getValue().toString() + " expected " + String.format("value_%010d", i));
- ok = false;
- }
-
- } else {
- log.error("no more rows, expected " + String.format("row_%010d", i));
- ok = false;
- break;
- }
-
- }
-
- if (ok) {
- System.out.println("OK");
- System.exit(0);
- } else {
- System.exit(1);
- }
- }
-
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8e0f19a1/examples/simple/src/main/java/org/apache/accumulo/examples/simple/reservations/ARS.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/reservations/ARS.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/reservations/ARS.java
deleted file mode 100644
index eff8e21..0000000
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/reservations/ARS.java
+++ /dev/null
@@ -1,303 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.simple.reservations;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map.Entry;
-
-import org.apache.accumulo.core.client.ClientConfiguration;
-import org.apache.accumulo.core.client.ConditionalWriter;
-import org.apache.accumulo.core.client.ConditionalWriter.Status;
-import org.apache.accumulo.core.client.ConditionalWriterConfig;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.IsolatedScanner;
-import org.apache.accumulo.core.client.Scanner;
-import org.apache.accumulo.core.client.ZooKeeperInstance;
-import org.apache.accumulo.core.client.security.tokens.PasswordToken;
-import org.apache.accumulo.core.data.Condition;
-import org.apache.accumulo.core.data.ConditionalMutation;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.hadoop.io.Text;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import jline.console.ConsoleReader;
-
-/**
- * Accumulo Reservation System : An example reservation system using Accumulo. Supports atomic reservations of a resource at a date. Wait list are also
- * supported. In order to keep the example simple, no checking is done of the date. Also the code is inefficient, if interested in improving it take a look at
- * the EXCERCISE comments.
- */
-
-// EXCERCISE create a test that verifies correctness under concurrency. For example, have M threads making reservations against N resources. Each thread could
-// randomly reserve and cancel resources for a single user. When each thread finishes, it knows what the state of its single user should be. When all threads
-// finish, collect their expected state and verify the status of all users and resources. For extra credit run the test on a IAAS provider using 10 nodes and
-// 10 threads per node.
-
-public class ARS {
-
- private static final Logger log = LoggerFactory.getLogger(ARS.class);
-
- private Connector conn;
- private String rTable;
-
- public enum ReservationResult {
- RESERVED, WAIT_LISTED
- }
-
- public ARS(Connector conn, String rTable) {
- this.conn = conn;
- this.rTable = rTable;
- }
-
- public List<String> setCapacity(String what, String when, int count) {
- // EXCERCISE implement this method which atomically sets a capacity and returns anyone who was moved to the wait list if the capacity was decreased
-
- throw new UnsupportedOperationException();
- }
-
- public ReservationResult reserve(String what, String when, String who) throws Exception {
-
- String row = what + ":" + when;
-
- // EXCERCISE This code assumes there is no reservation and tries to create one. If a reservation exist then the update will fail. This is a good strategy
- // when it is expected there are usually no reservations. Could modify the code to scan first.
-
- // The following mutation requires that the column tx:seq does not exist and will fail if it does.
- ConditionalMutation update = new ConditionalMutation(row, new Condition("tx", "seq"));
- update.put("tx", "seq", "0");
- update.put("res", String.format("%04d", 0), who);
-
- ReservationResult result = ReservationResult.RESERVED;
-
- // it is important to use an isolated scanner so that only whole mutations are seen
- try (ConditionalWriter cwriter = conn.createConditionalWriter(rTable, new ConditionalWriterConfig());
- Scanner scanner = new IsolatedScanner(conn.createScanner(rTable, Authorizations.EMPTY))) {
- while (true) {
- Status status = cwriter.write(update).getStatus();
- switch (status) {
- case ACCEPTED:
- return result;
- case REJECTED:
- case UNKNOWN:
- // read the row and decide what to do
- break;
- default:
- throw new RuntimeException("Unexpected status " + status);
- }
-
- // EXCERCISE in the case of many threads trying to reserve a slot, this approach of immediately retrying is inefficient. Exponential back-off is good
- // general solution to solve contention problems like this. However in this particular case, exponential back-off could penalize the earliest threads
- // that attempted to make a reservation by putting them later in the list. A more complex solution could involve having independent sub-queues within
- // the row that approximately maintain arrival order and use exponential back off to fairly merge the sub-queues into the main queue.
-
- scanner.setRange(new Range(row));
-
- int seq = -1;
- int maxReservation = -1;
-
- for (Entry<Key,Value> entry : scanner) {
- String cf = entry.getKey().getColumnFamilyData().toString();
- String cq = entry.getKey().getColumnQualifierData().toString();
- String val = entry.getValue().toString();
-
- if (cf.equals("tx") && cq.equals("seq")) {
- seq = Integer.parseInt(val);
- } else if (cf.equals("res")) {
- // EXCERCISE scanning the entire list to find if reserver is already in the list is inefficient. One possible way to solve this would be to sort the
- // data differently in Accumulo so that finding the reserver could be done quickly.
- if (val.equals(who))
- if (maxReservation == -1)
- return ReservationResult.RESERVED; // already have the first reservation
- else
- return ReservationResult.WAIT_LISTED; // already on wait list
-
- // EXCERCISE the way this code finds the max reservation is very inefficient.... it would be better if it did not have to scan the entire row.
- // One possibility is to just use the sequence number. Could also consider sorting the data in another way and/or using an iterator.
- maxReservation = Integer.parseInt(cq);
- }
- }
-
- Condition condition = new Condition("tx", "seq");
- if (seq >= 0)
- condition.setValue(seq + ""); // only expect a seq # if one was seen
-
- update = new ConditionalMutation(row, condition);
- update.put("tx", "seq", (seq + 1) + "");
- update.put("res", String.format("%04d", maxReservation + 1), who);
-
- // EXCERCISE if set capacity is implemented, then result should take capacity into account
- if (maxReservation == -1)
- result = ReservationResult.RESERVED; // if successful, will be first reservation
- else
- result = ReservationResult.WAIT_LISTED;
- }
- }
- }
-
- public void cancel(String what, String when, String who) throws Exception {
-
- String row = what + ":" + when;
-
- // Even though this method is only deleting a column, its important to use a conditional writer. By updating the seq # when deleting a reservation, it
- // will cause any concurrent reservations to retry. If this delete were done using a batch writer, then a concurrent reservation could report WAIT_LISTED
- // when it actually got the reservation.
-
- // its important to use an isolated scanner so that only whole mutations are seen
- try (ConditionalWriter cwriter = conn.createConditionalWriter(rTable, new ConditionalWriterConfig());
- Scanner scanner = new IsolatedScanner(conn.createScanner(rTable, Authorizations.EMPTY))) {
- while (true) {
- scanner.setRange(new Range(row));
-
- int seq = -1;
- String reservation = null;
-
- for (Entry<Key,Value> entry : scanner) {
- String cf = entry.getKey().getColumnFamilyData().toString();
- String cq = entry.getKey().getColumnQualifierData().toString();
- String val = entry.getValue().toString();
-
- // EXCERCISE avoid linear scan
-
- if (cf.equals("tx") && cq.equals("seq")) {
- seq = Integer.parseInt(val);
- } else if (cf.equals("res") && val.equals(who)) {
- reservation = cq;
- }
- }
-
- if (reservation != null) {
- ConditionalMutation update = new ConditionalMutation(row, new Condition("tx", "seq").setValue(seq + ""));
- update.putDelete("res", reservation);
- update.put("tx", "seq", (seq + 1) + "");
-
- Status status = cwriter.write(update).getStatus();
- switch (status) {
- case ACCEPTED:
- // successfully canceled reservation
- return;
- case REJECTED:
- case UNKNOWN:
- // retry
- // EXCERCISE exponential back-off could be used here
- break;
- default:
- throw new RuntimeException("Unexpected status " + status);
- }
-
- } else {
- // not reserved, nothing to do
- break;
- }
-
- }
- }
- }
-
- public List<String> list(String what, String when) throws Exception {
- String row = what + ":" + when;
-
- // its important to use an isolated scanner so that only whole mutations are seen
- try (Scanner scanner = new IsolatedScanner(conn.createScanner(rTable, Authorizations.EMPTY))) {
- scanner.setRange(new Range(row));
- scanner.fetchColumnFamily(new Text("res"));
-
- List<String> reservations = new ArrayList<>();
-
- for (Entry<Key,Value> entry : scanner) {
- String val = entry.getValue().toString();
- reservations.add(val);
- }
-
- return reservations;
- }
- }
-
- public static void main(String[] args) throws Exception {
- final ConsoleReader reader = new ConsoleReader();
- ARS ars = null;
-
- while (true) {
- String line = reader.readLine(">");
- if (line == null)
- break;
-
- final String[] tokens = line.split("\\s+");
-
- if (tokens[0].equals("reserve") && tokens.length >= 4 && ars != null) {
- // start up multiple threads all trying to reserve the same resource, no more than one should succeed
-
- final ARS fars = ars;
- ArrayList<Thread> threads = new ArrayList<>();
- for (int i = 3; i < tokens.length; i++) {
- final int whoIndex = i;
- Runnable reservationTask = new Runnable() {
- @Override
- public void run() {
- try {
- reader.println(" " + String.format("%20s", tokens[whoIndex]) + " : " + fars.reserve(tokens[1], tokens[2], tokens[whoIndex]));
- } catch (Exception e) {
- log.warn("Could not write to the ConsoleReader.", e);
- }
- }
- };
-
- threads.add(new Thread(reservationTask));
- }
-
- for (Thread thread : threads)
- thread.start();
-
- for (Thread thread : threads)
- thread.join();
-
- } else if (tokens[0].equals("cancel") && tokens.length == 4 && ars != null) {
- ars.cancel(tokens[1], tokens[2], tokens[3]);
- } else if (tokens[0].equals("list") && tokens.length == 3 && ars != null) {
- List<String> reservations = ars.list(tokens[1], tokens[2]);
- if (reservations.size() > 0) {
- reader.println(" Reservation holder : " + reservations.get(0));
- if (reservations.size() > 1)
- reader.println(" Wait list : " + reservations.subList(1, reservations.size()));
- }
- } else if (tokens[0].equals("quit") && tokens.length == 1) {
- break;
- } else if (tokens[0].equals("connect") && tokens.length == 6 && ars == null) {
- ZooKeeperInstance zki = new ZooKeeperInstance(new ClientConfiguration().withInstance(tokens[1]).withZkHosts(tokens[2]));
- Connector conn = zki.getConnector(tokens[3], new PasswordToken(tokens[4]));
- if (conn.tableOperations().exists(tokens[5])) {
- ars = new ARS(conn, tokens[5]);
- reader.println(" connected");
- } else
- reader.println(" No Such Table");
- } else {
- System.out.println(" Commands : ");
- if (ars == null) {
- reader.println(" connect <instance> <zookeepers> <user> <pass> <table>");
- } else {
- reader.println(" reserve <what> <when> <who> {who}");
- reader.println(" cancel <what> <when> <who>");
- reader.println(" list <what> <when>");
- }
- }
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8e0f19a1/examples/simple/src/main/java/org/apache/accumulo/examples/simple/sample/SampleExample.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/sample/SampleExample.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/sample/SampleExample.java
deleted file mode 100644
index 262e63d..0000000
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/sample/SampleExample.java
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.accumulo.examples.simple.sample;
-
-import java.util.Collections;
-import java.util.Map.Entry;
-
-import org.apache.accumulo.core.cli.BatchWriterOpts;
-import org.apache.accumulo.core.cli.ClientOnDefaultTable;
-import org.apache.accumulo.core.client.BatchWriter;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.SampleNotPresentException;
-import org.apache.accumulo.core.client.Scanner;
-import org.apache.accumulo.core.client.admin.CompactionConfig;
-import org.apache.accumulo.core.client.admin.CompactionStrategyConfig;
-import org.apache.accumulo.core.client.sample.RowSampler;
-import org.apache.accumulo.core.client.sample.SamplerConfiguration;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.examples.simple.client.RandomBatchWriter;
-import org.apache.accumulo.examples.simple.shard.CutoffIntersectingIterator;
-
-import com.google.common.collect.ImmutableMap;
-
-/**
- * A simple example of using Accumulo's sampling feature. This example does something similar to what README.sample shows using the shell. Also see
- * {@link CutoffIntersectingIterator} and README.sample for an example of how to use sample data from within an iterator.
- */
-public class SampleExample {
-
- // a compaction strategy that only selects files for compaction that have no sample data or sample data created in a different way than the tables
- static final CompactionStrategyConfig NO_SAMPLE_STRATEGY = new CompactionStrategyConfig(
- "org.apache.accumulo.tserver.compaction.strategies.ConfigurableCompactionStrategy").setOptions(Collections.singletonMap("SF_NO_SAMPLE", ""));
-
- static class Opts extends ClientOnDefaultTable {
- public Opts() {
- super("sampex");
- }
- }
-
- public static void main(String[] args) throws Exception {
- Opts opts = new Opts();
- BatchWriterOpts bwOpts = new BatchWriterOpts();
- opts.parseArgs(RandomBatchWriter.class.getName(), args, bwOpts);
-
- Connector conn = opts.getConnector();
-
- if (!conn.tableOperations().exists(opts.getTableName())) {
- conn.tableOperations().create(opts.getTableName());
- } else {
- System.out.println("Table exists, not doing anything.");
- return;
- }
-
- // write some data
- BatchWriter bw = conn.createBatchWriter(opts.getTableName(), bwOpts.getBatchWriterConfig());
- bw.addMutation(createMutation("9225", "abcde", "file://foo.txt"));
- bw.addMutation(createMutation("8934", "accumulo scales", "file://accumulo_notes.txt"));
- bw.addMutation(createMutation("2317", "milk, eggs, bread, parmigiano-reggiano", "file://groceries/9/txt"));
- bw.addMutation(createMutation("3900", "EC2 ate my homework", "file://final_project.txt"));
- bw.flush();
-
- SamplerConfiguration sc1 = new SamplerConfiguration(RowSampler.class.getName());
- sc1.setOptions(ImmutableMap.of("hasher", "murmur3_32", "modulus", "3"));
-
- conn.tableOperations().setSamplerConfiguration(opts.getTableName(), sc1);
-
- Scanner scanner = conn.createScanner(opts.getTableName(), Authorizations.EMPTY);
- System.out.println("Scanning all data :");
- print(scanner);
- System.out.println();
-
- System.out.println("Scanning with sampler configuration. Data was written before sampler was set on table, scan should fail.");
- scanner.setSamplerConfiguration(sc1);
- try {
- print(scanner);
- } catch (SampleNotPresentException e) {
- System.out.println(" Saw sample not present exception as expected.");
- }
- System.out.println();
-
- // compact table to recreate sample data
- conn.tableOperations().compact(opts.getTableName(), new CompactionConfig().setCompactionStrategy(NO_SAMPLE_STRATEGY));
-
- System.out.println("Scanning after compaction (compaction should have created sample data) : ");
- print(scanner);
- System.out.println();
-
- // update a document in the sample data
- bw.addMutation(createMutation("2317", "milk, eggs, bread, parmigiano-reggiano, butter", "file://groceries/9/txt"));
- bw.close();
- System.out.println("Scanning sample after updating content for docId 2317 (should see content change in sample data) : ");
- print(scanner);
- System.out.println();
-
- // change tables sampling configuration...
- SamplerConfiguration sc2 = new SamplerConfiguration(RowSampler.class.getName());
- sc2.setOptions(ImmutableMap.of("hasher", "murmur3_32", "modulus", "2"));
- conn.tableOperations().setSamplerConfiguration(opts.getTableName(), sc2);
- // compact table to recreate sample data using new configuration
- conn.tableOperations().compact(opts.getTableName(), new CompactionConfig().setCompactionStrategy(NO_SAMPLE_STRATEGY));
-
- System.out.println("Scanning with old sampler configuration. Sample data was created using new configuration with a compaction. Scan should fail.");
- try {
- // try scanning with old sampler configuration
- print(scanner);
- } catch (SampleNotPresentException e) {
- System.out.println(" Saw sample not present exception as expected ");
- }
- System.out.println();
-
- // update expected sampler configuration on scanner
- scanner.setSamplerConfiguration(sc2);
-
- System.out.println("Scanning with new sampler configuration : ");
- print(scanner);
- System.out.println();
-
- }
-
- private static void print(Scanner scanner) {
- for (Entry<Key,Value> entry : scanner) {
- System.out.println(" " + entry.getKey() + " " + entry.getValue());
- }
- }
-
- private static Mutation createMutation(String docId, String content, String url) {
- Mutation m = new Mutation(docId);
- m.put("doc", "context", content);
- m.put("doc", "url", url);
- return m;
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8e0f19a1/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/ContinuousQuery.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/ContinuousQuery.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/ContinuousQuery.java
deleted file mode 100644
index 604c851..0000000
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/ContinuousQuery.java
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.simple.shard;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Map.Entry;
-import java.util.Random;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.accumulo.core.cli.BatchScannerOpts;
-import org.apache.accumulo.core.cli.ClientOpts;
-import org.apache.accumulo.core.client.BatchScanner;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.client.Scanner;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.iterators.user.IntersectingIterator;
-import org.apache.hadoop.io.Text;
-
-import com.beust.jcommander.Parameter;
-import com.google.common.collect.Iterators;
-
-/**
- * Using the doc2word table created by Reverse.java, this program randomly selects N words per document. Then it continually queries a random set of words in
- * the shard table (created by {@link Index}) using the {@link IntersectingIterator}.
- *
- * See docs/examples/README.shard for instructions.
- */
-
-public class ContinuousQuery {
-
- static class Opts extends ClientOpts {
- @Parameter(names = "--shardTable", required = true, description = "name of the shard table")
- String tableName = null;
- @Parameter(names = "--doc2Term", required = true, description = "name of the doc2Term table")
- String doc2Term;
- @Parameter(names = "--terms", required = true, description = "the number of terms in the query")
- int numTerms;
- @Parameter(names = "--count", description = "the number of queries to run")
- long iterations = Long.MAX_VALUE;
- }
-
- public static void main(String[] args) throws Exception {
- Opts opts = new Opts();
- BatchScannerOpts bsOpts = new BatchScannerOpts();
- opts.parseArgs(ContinuousQuery.class.getName(), args, bsOpts);
-
- Connector conn = opts.getConnector();
-
- ArrayList<Text[]> randTerms = findRandomTerms(conn.createScanner(opts.doc2Term, opts.auths), opts.numTerms);
-
- Random rand = new Random();
-
- BatchScanner bs = conn.createBatchScanner(opts.tableName, opts.auths, bsOpts.scanThreads);
- bs.setTimeout(bsOpts.scanTimeout, TimeUnit.MILLISECONDS);
-
- for (long i = 0; i < opts.iterations; i += 1) {
- Text[] columns = randTerms.get(rand.nextInt(randTerms.size()));
-
- bs.clearScanIterators();
- bs.clearColumns();
-
- IteratorSetting ii = new IteratorSetting(20, "ii", IntersectingIterator.class);
- IntersectingIterator.setColumnFamilies(ii, columns);
- bs.addScanIterator(ii);
- bs.setRanges(Collections.singleton(new Range()));
-
- long t1 = System.currentTimeMillis();
- int count = Iterators.size(bs.iterator());
- long t2 = System.currentTimeMillis();
-
- System.out.printf(" %s %,d %6.3f%n", Arrays.asList(columns), count, (t2 - t1) / 1000.0);
- }
-
- bs.close();
-
- }
-
- private static ArrayList<Text[]> findRandomTerms(Scanner scanner, int numTerms) {
-
- Text currentRow = null;
-
- ArrayList<Text> words = new ArrayList<>();
- ArrayList<Text[]> ret = new ArrayList<>();
-
- Random rand = new Random();
-
- for (Entry<Key,Value> entry : scanner) {
- Key key = entry.getKey();
-
- if (currentRow == null)
- currentRow = key.getRow();
-
- if (!currentRow.equals(key.getRow())) {
- selectRandomWords(words, ret, rand, numTerms);
- words.clear();
- currentRow = key.getRow();
- }
-
- words.add(key.getColumnFamily());
-
- }
-
- selectRandomWords(words, ret, rand, numTerms);
-
- return ret;
- }
-
- private static void selectRandomWords(ArrayList<Text> words, ArrayList<Text[]> ret, Random rand, int numTerms) {
- if (words.size() >= numTerms) {
- Collections.shuffle(words, rand);
- Text docWords[] = new Text[numTerms];
- for (int i = 0; i < docWords.length; i++) {
- docWords[i] = words.get(i);
- }
-
- ret.add(docWords);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8e0f19a1/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/CutoffIntersectingIterator.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/CutoffIntersectingIterator.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/CutoffIntersectingIterator.java
deleted file mode 100644
index f5dce1d..0000000
--- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/CutoffIntersectingIterator.java
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.accumulo.examples.simple.shard;
-
-import static com.google.common.base.Preconditions.checkArgument;
-import static java.util.Objects.requireNonNull;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Map;
-
-import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.client.sample.RowColumnSampler;
-import org.apache.accumulo.core.client.sample.SamplerConfiguration;
-import org.apache.accumulo.core.data.ByteSequence;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.iterators.IteratorEnvironment;
-import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
-import org.apache.accumulo.core.iterators.user.IntersectingIterator;
-
-/**
- * This iterator uses a sample built from the Column Qualifier to quickly avoid intersecting iterator queries that may return too many documents.
- */
-
-public class CutoffIntersectingIterator extends IntersectingIterator {
-
- private IntersectingIterator sampleII;
- private int sampleMax;
- private boolean hasTop;
-
- public static void setCutoff(IteratorSetting iterCfg, int cutoff) {
- checkArgument(cutoff >= 0);
- iterCfg.addOption("cutoff", cutoff + "");
- }
-
- @Override
- public boolean hasTop() {
- return hasTop && super.hasTop();
- }
-
- @Override
- public void seek(Range range, Collection<ByteSequence> seekColumnFamilies, boolean inclusive) throws IOException {
-
- sampleII.seek(range, seekColumnFamilies, inclusive);
-
- // this check will be redone whenever iterator stack is torn down and recreated.
- int count = 0;
- while (count <= sampleMax && sampleII.hasTop()) {
- sampleII.next();
- count++;
- }
-
- if (count > sampleMax) {
- // In a real application would probably want to return a key value that indicates too much data. Since this would execute for each tablet, some tablets
- // may return data. For tablets that did not return data, would want an indication.
- hasTop = false;
- } else {
- hasTop = true;
- super.seek(range, seekColumnFamilies, inclusive);
- }
- }
-
- @Override
- public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {
- super.init(source, options, env);
-
- IteratorEnvironment sampleEnv = env.cloneWithSamplingEnabled();
-
- setMax(sampleEnv, options);
-
- SortedKeyValueIterator<Key,Value> sampleDC = source.deepCopy(sampleEnv);
- sampleII = new IntersectingIterator();
- sampleII.init(sampleDC, options, env);
-
- }
-
- static void validateSamplerConfig(SamplerConfiguration sampleConfig) {
- requireNonNull(sampleConfig);
- checkArgument(sampleConfig.getSamplerClassName().equals(RowColumnSampler.class.getName()), "Unexpected Sampler " + sampleConfig.getSamplerClassName());
- checkArgument(sampleConfig.getOptions().get("qualifier").equals("true"), "Expected sample on column qualifier");
- checkArgument(isNullOrFalse(sampleConfig.getOptions(), "row", "family", "visibility"), "Expected sample on column qualifier only");
- }
-
- private void setMax(IteratorEnvironment sampleEnv, Map<String,String> options) {
- String cutoffValue = options.get("cutoff");
- SamplerConfiguration sampleConfig = sampleEnv.getSamplerConfiguration();
-
- // Ensure the sample was constructed in an expected way. If the sample is not built as expected, then can not draw conclusions based on sample.
- requireNonNull(cutoffValue, "Expected cutoff option is missing");
- validateSamplerConfig(sampleConfig);
-
- int modulus = Integer.parseInt(sampleConfig.getOptions().get("modulus"));
-
- sampleMax = Math.round(Float.parseFloat(cutoffValue) / modulus);
- }
-
- private static boolean isNullOrFalse(Map<String,String> options, String... keys) {
- for (String key : keys) {
- String val = options.get(key);
- if (val != null && val.equals("true")) {
- return false;
- }
- }
- return true;
- }
-}