You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by ec...@apache.org on 2012/01/12 17:06:20 UTC
svn commit: r1230608 [3/16] - in /incubator/accumulo/trunk: ./
contrib/accumulo_sample/ src/assemble/ src/core/
src/core/src/main/java/org/apache/accumulo/core/client/impl/thrift/
src/core/src/main/java/org/apache/accumulo/core/master/thrift/ src/core/...
Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithBatchWriter.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithBatchWriter.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithBatchWriter.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithBatchWriter.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.helloworld;
+
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.MultiTableBatchWriter;
+import org.apache.accumulo.core.client.MutationsRejectedException;
+import org.apache.accumulo.core.client.TableExistsException;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.hadoop.io.Text;
+
+public class InsertWithBatchWriter {
+ public static void main(String[] args) throws AccumuloException, AccumuloSecurityException, MutationsRejectedException, TableExistsException,
+ TableNotFoundException {
+ if (args.length != 5) {
+ System.out
+ .println("Usage: bin/tool.sh accumulo-examplesjar accumulo.examples.helloworld.InsertWithBatchWriter <instance name> <zoo keepers> <tableName> <username> <password>");
+ System.exit(1);
+ }
+
+ String instanceName = args[0];
+ String zooKeepers = args[1];
+ String tableName = args[2];
+ String user = args[3];
+ byte[] pass = args[4].getBytes();
+
+ ZooKeeperInstance instance = new ZooKeeperInstance(instanceName, zooKeepers);
+ Connector connector = instance.getConnector(user, pass);
+ MultiTableBatchWriter mtbw = connector.createMultiTableBatchWriter(200000l, 300, 4);
+
+ BatchWriter bw = null;
+
+ if (!connector.tableOperations().exists(tableName))
+ connector.tableOperations().create(tableName);
+ bw = mtbw.getBatchWriter(tableName);
+
+ Text colf = new Text("colfam");
+ System.out.println("writing ...");
+ for (int i = 0; i < 10000; i++) {
+ Mutation m = new Mutation(new Text(String.format("row_%d", i)));
+ for (int j = 0; j < 5; j++) {
+ m.put(colf, new Text(String.format("colqual_%d", j)), new Value((String.format("value_%d_%d", i, j)).getBytes()));
+ }
+ bw.addMutation(m);
+ if (i % 100 == 0)
+ System.out.println(i);
+ }
+
+ mtbw.close();
+ }
+
+}
Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithBatchWriter.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithOutputFormat.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithOutputFormat.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithOutputFormat.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithOutputFormat.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.helloworld;
+
+import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+public class InsertWithOutputFormat extends Configured implements Tool {
+ // this is a tool because when you run a mapreduce, you will need to use the
+ // ToolRunner
+ // if you want libjars to be passed properly to the map and reduce tasks
+ // even though this class isn't a mapreduce
+ @Override
+ public int run(String[] args) throws Exception {
+ if (args.length != 5) {
+ System.out.println("Usage: accumulo " + this.getClass().getName() + " <instance name> <zoo keepers> <tablename> <username> <password>");
+ return 1;
+ }
+ Text tableName = new Text(args[2]);
+ Job job = new Job(getConf());
+ Configuration conf = job.getConfiguration();
+ AccumuloOutputFormat.setZooKeeperInstance(job, args[0], args[1]);
+ AccumuloOutputFormat.setOutputInfo(job, args[3], args[4].getBytes(), true, null);
+ job.setOutputFormatClass(AccumuloOutputFormat.class);
+
+ // when running a mapreduce, you won't need to instantiate the output
+ // format and record writer
+ // mapreduce will do that for you, and you will just use
+ // output.collect(tableName, mutation)
+ TaskAttemptContext context = new TaskAttemptContext(conf, new TaskAttemptID());
+ RecordWriter<Text,Mutation> rw = new AccumuloOutputFormat().getRecordWriter(context);
+
+ Text colf = new Text("colfam");
+ System.out.println("writing ...");
+ for (int i = 0; i < 10000; i++) {
+ Mutation m = new Mutation(new Text(String.format("row_%d", i)));
+ for (int j = 0; j < 5; j++) {
+ m.put(colf, new Text(String.format("colqual_%d", j)), new Value((String.format("value_%d_%d", i, j)).getBytes()));
+ }
+ rw.write(tableName, m); // repeat until done
+ if (i % 100 == 0)
+ System.out.println(i);
+ }
+
+ rw.close(context); // close when done
+ return 0;
+ }
+
+ public static void main(String[] args) throws Exception {
+ System.exit(ToolRunner.run(CachedConfiguration.getInstance(), new InsertWithOutputFormat(), args));
+ }
+}
Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithOutputFormat.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/ReadData.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/ReadData.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/ReadData.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/ReadData.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.helloworld;
+
+import java.util.Iterator;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.Constants;
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.hadoop.io.Text;
+
+public class ReadData {
+ public static void main(String[] args) throws AccumuloException, AccumuloSecurityException, TableNotFoundException {
+ if (args.length < 5 || args.length > 7) {
+ System.out
+ .println("bin/accumulo accumulo.examples.helloworld.ReadData <instance name> <zoo keepers> <tablename> <username> <password> [startkey [endkey]]");
+ System.exit(1);
+ }
+
+ String instanceName = args[0];
+ String zooKeepers = args[1];
+ String tableName = args[2];
+ String user = args[3];
+ byte[] pass = args[4].getBytes();
+
+ ZooKeeperInstance instance = new ZooKeeperInstance(instanceName, zooKeepers);
+ Connector connector = instance.getConnector(user, pass);
+
+ Scanner scan = connector.createScanner(tableName, Constants.NO_AUTHS);
+ Key start = null;
+ if (args.length > 5)
+ start = new Key(new Text(args[5]));
+ Key end = null;
+ if (args.length > 6)
+ end = new Key(new Text(args[6]));
+ scan.setRange(new Range(start, end));
+ Iterator<Entry<Key,Value>> iter = scan.iterator();
+
+ while (iter.hasNext()) {
+ Entry<Key,Value> e = iter.next();
+ Text colf = e.getKey().getColumnFamily();
+ Text colq = e.getKey().getColumnQualifier();
+ System.out.print("row: " + e.getKey().getRow() + ", colf: " + colf + ", colq: " + colq);
+ System.out.println(", value: " + e.getValue().toString());
+ }
+ }
+}
Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/ReadData.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/isolation/InterferenceTest.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/isolation/InterferenceTest.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/isolation/InterferenceTest.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/isolation/InterferenceTest.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.isolation;
+
+import java.util.HashSet;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.Constants;
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.IsolatedScanner;
+import org.apache.accumulo.core.client.MutationsRejectedException;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.data.ByteSequence;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.hadoop.io.Text;
+import org.apache.log4j.Logger;
+
+/**
+ * This example shows how a concurrent reader and writer can interfere with each other. It creates two threads that run forever reading and writing to the same
+ * table.
+ *
+ * When the example is run with isolation enabled, no interference will be observed.
+ *
+ * When the example is run with out isolation, the reader will see partial mutations of a row.
+ *
+ */
+
+public class InterferenceTest {
+
+ private static final int NUM_ROWS = 500;
+ private static final int NUM_COLUMNS = 113; // scanner batches 1000 by default, so make num columns not a multiple of 10
+ private static long iterations;
+ private static final Logger log = Logger.getLogger(InterferenceTest.class);
+
+ static class Writer implements Runnable {
+
+ private BatchWriter bw;
+
+ Writer(BatchWriter bw) {
+ this.bw = bw;
+ }
+
+ @Override
+ public void run() {
+ int row = 0;
+ int value = 0;
+
+ for (long i = 0; i < iterations; i++) {
+ Mutation m = new Mutation(new Text(String.format("%03d", row)));
+ row = (row + 1) % NUM_ROWS;
+
+ for (int cq = 0; cq < NUM_COLUMNS; cq++)
+ m.put(new Text("000"), new Text(String.format("%04d", cq)), new Value(("" + value).getBytes()));
+
+ value++;
+
+ try {
+ bw.addMutation(m);
+ } catch (MutationsRejectedException e) {
+ e.printStackTrace();
+ System.exit(-1);
+ }
+ }
+ try {
+ bw.close();
+ } catch (MutationsRejectedException e) {
+ log.error(e, e);
+ }
+ }
+ }
+
+ static class Reader implements Runnable {
+
+ private Scanner scanner;
+ volatile boolean stop = false;
+
+ Reader(Scanner scanner) {
+ this.scanner = scanner;
+ }
+
+ @Override
+ public void run() {
+ while (stop) {
+ ByteSequence row = null;
+ int count = 0;
+
+ // all columns in a row should have the same value,
+ // use this hash set to track that
+ HashSet<String> values = new HashSet<String>();
+
+ for (Entry<Key,Value> entry : scanner) {
+ if (row == null)
+ row = entry.getKey().getRowData();
+
+ if (!row.equals(entry.getKey().getRowData())) {
+ if (count != NUM_COLUMNS)
+ System.err.println("ERROR Did not see " + NUM_COLUMNS + " columns in row " + row);
+
+ if (values.size() > 1)
+ System.err.println("ERROR Columns in row " + row + " had multiple values " + values);
+
+ row = entry.getKey().getRowData();
+ count = 0;
+ values.clear();
+ }
+
+ count++;
+
+ values.add(entry.getValue().toString());
+ }
+
+ if (count > 0 && count != NUM_COLUMNS)
+ System.err.println("ERROR Did not see " + NUM_COLUMNS + " columns in row " + row);
+
+ if (values.size() > 1)
+ System.err.println("ERROR Columns in row " + row + " had multiple values " + values);
+ }
+ }
+
+ public void stopNow() {
+ stop = true;
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+
+ if (args.length != 7) {
+ System.out.println("Usage : " + InterferenceTest.class.getName() + " <instance name> <zookeepers> <user> <password> <table> <iterations> true|false");
+ System.out.println(" The last argument determines if scans should be isolated. When false, expect to see errors");
+ return;
+ }
+
+ ZooKeeperInstance zki = new ZooKeeperInstance(args[0], args[1]);
+ Connector conn = zki.getConnector(args[2], args[3].getBytes());
+
+ String table = args[4];
+ iterations = Long.parseLong(args[5]);
+ if (iterations < 1)
+ iterations = Long.MAX_VALUE;
+ if (!conn.tableOperations().exists(table))
+ conn.tableOperations().create(table);
+
+ Thread writer = new Thread(new Writer(conn.createBatchWriter(table, 10000000, 60000l, 3)));
+ writer.start();
+ Reader r;
+ if (Boolean.parseBoolean(args[6]))
+ r = new Reader(new IsolatedScanner(conn.createScanner(table, Constants.NO_AUTHS)));
+ else
+ r = new Reader(conn.createScanner(table, Constants.NO_AUTHS));
+ Thread reader;
+ reader = new Thread(r);
+ reader.start();
+ writer.join();
+ r.stopNow();
+ reader.join();
+ System.out.println("finished");
+ }
+}
Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/isolation/InterferenceTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RegexExample.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RegexExample.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RegexExample.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RegexExample.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.user.RegExFilter;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+public class RegexExample extends Configured implements Tool {
+ public static class RegexMapper extends Mapper<Key,Value,Key,Value> {
+ public void map(Key row, Value data, Context context) throws IOException, InterruptedException {
+ context.write(row, data);
+ }
+ }
+
+ public int run(String[] args) throws Exception {
+ Job job = new Job(getConf(), this.getClass().getSimpleName());
+ job.setJarByClass(this.getClass());
+
+ job.setInputFormatClass(AccumuloInputFormat.class);
+ AccumuloInputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
+ AccumuloInputFormat.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), args[4], new Authorizations());
+
+ IteratorSetting regex = new IteratorSetting(50, "regex", RegExFilter.class);
+ RegExFilter.setRegexs(regex, args[5], args[6], args[7], args[8], false);
+ AccumuloInputFormat.addIterator(job.getConfiguration(), regex);
+
+ job.setMapperClass(RegexMapper.class);
+ job.setMapOutputKeyClass(Key.class);
+ job.setMapOutputValueClass(Value.class);
+
+ job.setNumReduceTasks(0);
+
+ job.setOutputFormatClass(TextOutputFormat.class);
+ TextOutputFormat.setOutputPath(job, new Path(args[9]));
+
+ System.out.println("setRowRegex: " + args[5]);
+ System.out.println("setColumnFamilyRegex: " + args[6]);
+ System.out.println("setColumnQualifierRegex: " + args[7]);
+ System.out.println("setValueRegex: " + args[8]);
+
+ job.waitForCompletion(true);
+ return job.isSuccessful() ? 0 : 1;
+ }
+
+ public static void main(String[] args) throws Exception {
+ int res = ToolRunner.run(CachedConfiguration.getInstance(), new RegexExample(), args);
+ if (res != 0)
+ System.exit(res);
+ }
+}
Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RegexExample.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RowHash.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RowHash.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RowHash.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RowHash.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.mapreduce;
+
+import java.io.IOException;
+import java.util.Collections;
+
+import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
+import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.accumulo.core.util.Pair;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.MD5Hash;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+public class RowHash extends Configured implements Tool {
+ /**
+ * The Mapper class that given a row number, will generate the appropriate output line.
+ */
+ public static class HashDataMapper extends Mapper<Key,Value,Text,Mutation> {
+ public void map(Key row, Value data, Context context) throws IOException, InterruptedException {
+ Mutation m = new Mutation(row.getRow());
+ m.put(new Text("cf-HASHTYPE"), new Text("cq-MD5BASE64"), new Value(Base64.encodeBase64(MD5Hash.digest(data.toString()).getDigest())));
+ context.write(null, m);
+ context.progress();
+ }
+
+ @Override
+ public void setup(Context job) {}
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ Job job = new Job(getConf(), this.getClass().getName());
+ job.setJarByClass(this.getClass());
+
+ job.setInputFormatClass(AccumuloInputFormat.class);
+ AccumuloInputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
+ AccumuloInputFormat.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), args[4], new Authorizations());
+
+ String col = args[5];
+ int idx = col.indexOf(":");
+ Text cf = new Text(idx < 0 ? col : col.substring(0, idx));
+ Text cq = idx < 0 ? null : new Text(col.substring(idx + 1));
+ if (cf.getLength() > 0)
+ AccumuloInputFormat.fetchColumns(job.getConfiguration(), Collections.singleton(new Pair<Text,Text>(cf, cq)));
+
+ // AccumuloInputFormat.setLogLevel(job, Level.TRACE);
+
+ job.setMapperClass(HashDataMapper.class);
+ job.setMapOutputKeyClass(Text.class);
+ job.setMapOutputValueClass(Mutation.class);
+
+ job.setNumReduceTasks(0);
+
+ job.setOutputFormatClass(AccumuloOutputFormat.class);
+ AccumuloOutputFormat.setZooKeeperInstance(job, args[0], args[1]);
+ AccumuloOutputFormat.setOutputInfo(job, args[2], args[3].getBytes(), true, args[6]);
+ // AccumuloOutputFormat.setLogLevel(job, Level.TRACE);
+
+ job.waitForCompletion(true);
+ return job.isSuccessful() ? 0 : 1;
+ }
+
+ public static void main(String[] args) throws Exception {
+ int res = ToolRunner.run(CachedConfiguration.getInstance(), new RowHash(), args);
+ if (res != 0)
+ System.exit(res);
+ }
+}
Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RowHash.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TableToFile.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TableToFile.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TableToFile.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TableToFile.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.mapreduce;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Map;
+
+import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.accumulo.core.util.Pair;
+import org.apache.accumulo.core.util.format.DefaultFormatter;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * Takes a table and outputs the specified column to a set of part files on hdfs accumulo accumulo.examples.mapreduce.TableToFile <username> <password>
+ * <tablename> <column> <hdfs-output-path>
+ */
+public class TableToFile extends Configured implements Tool {
+ /**
+ * The Mapper class that given a row number, will generate the appropriate output line.
+ */
+ public static class TTFMapper extends Mapper<Key,Value,NullWritable,Text> {
+ public void map(Key row, Value data, Context context) throws IOException, InterruptedException {
+ final Key r = row;
+ final Value v = data;
+ Map.Entry<Key,Value> entry = new Map.Entry<Key,Value>() {
+ @Override
+ public Key getKey() {
+ return r;
+ }
+
+ @Override
+ public Value getValue() {
+ return v;
+ }
+
+ @Override
+ public Value setValue(Value value) {
+ return null;
+ }
+ };
+ context.write(NullWritable.get(), new Text(DefaultFormatter.formatEntry(entry, false)));
+ context.setStatus("Outputed Value");
+ }
+ }
+
+ @Override
+ public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
+ Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
+ job.setJarByClass(this.getClass());
+
+ job.setInputFormatClass(AccumuloInputFormat.class);
+ AccumuloInputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
+ AccumuloInputFormat.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), args[4], new Authorizations());
+
+ HashSet<Pair<Text,Text>> columnsToFetch = new HashSet<Pair<Text,Text>>();
+ for (String col : args[5].split(",")) {
+ int idx = col.indexOf(":");
+ Text cf = new Text(idx < 0 ? col : col.substring(0, idx));
+ Text cq = idx < 0 ? null : new Text(col.substring(idx + 1));
+ if (cf.getLength() > 0)
+ columnsToFetch.add(new Pair<Text,Text>(cf, cq));
+ }
+ if (!columnsToFetch.isEmpty())
+ AccumuloInputFormat.fetchColumns(job.getConfiguration(), columnsToFetch);
+
+ job.setMapperClass(TTFMapper.class);
+ job.setMapOutputKeyClass(NullWritable.class);
+ job.setMapOutputValueClass(Text.class);
+
+ job.setNumReduceTasks(0);
+
+ job.setOutputFormatClass(TextOutputFormat.class);
+ TextOutputFormat.setOutputPath(job, new Path(args[6]));
+
+ job.waitForCompletion(true);
+ return job.isSuccessful() ? 0 : 1;
+ }
+
+ /**
+ *
+ * @param args
+ * instanceName zookeepers username password table columns outputpath
+ * @throws Exception
+ */
+ public static void main(String[] args) throws Exception {
+ int res = ToolRunner.run(CachedConfiguration.getInstance(), new TableToFile(), args);
+ if (res != 0)
+ System.exit(res);
+ }
+}
Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TableToFile.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TeraSortIngest.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TeraSortIngest.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TeraSortIngest.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TeraSortIngest.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,377 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.examples.simple.mapreduce;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * Generate the *almost* official terasort input data set. (See below) The user specifies the number of rows and the output directory and this class runs a
+ * map/reduce program to generate the data. The format of the data is:
+ * <ul>
+ * <li>(10 bytes key) (10 bytes rowid) (78 bytes filler) \r \n
+ * <li>The keys are random characters from the set ' ' .. '~'.
+ * <li>The rowid is the right justified row id as a int.
+ * <li>The filler consists of 7 runs of 10 characters from 'A' to 'Z'.
+ * </ul>
+ *
+ * This TeraSort is slightly modified to allow for variable length key sizes and value sizes. The row length isn't variable. To generate a terabyte of data in
+ * the same way TeraSort does use 10000000000 rows and 10/10 byte key length and 78/78 byte value length. Along with the 10 byte row id and \r\n this gives you
+ * 100 byte row * 10000000000 rows = 1tb. Min/Max ranges for key and value parameters are inclusive/inclusive respectively.
+ *
+ * Params <numrows> <minkeylength> <maxkeylength> <minvaluelength> <maxvaluelength> <tablename> <instance> <zoohosts> <username> <password> [numsplits]
+ * numsplits allows you specify how many splits, and therefore mappers, to use
+ *
+ *
+ */
+public class TeraSortIngest extends Configured implements Tool {
+ /**
+ * An input format that assigns ranges of longs to each mapper.
+ */
+ static class RangeInputFormat extends InputFormat<LongWritable,NullWritable> {
+ /**
+ * An input split consisting of a range on numbers.
+ */
+ static class RangeInputSplit extends InputSplit implements Writable {
+ long firstRow;
+ long rowCount;
+
+ public RangeInputSplit() {}
+
+ public RangeInputSplit(long offset, long length) {
+ firstRow = offset;
+ rowCount = length;
+ }
+
+ public long getLength() throws IOException {
+ return 0;
+ }
+
+ public String[] getLocations() throws IOException {
+ return new String[] {};
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ firstRow = WritableUtils.readVLong(in);
+ rowCount = WritableUtils.readVLong(in);
+ }
+
+ public void write(DataOutput out) throws IOException {
+ WritableUtils.writeVLong(out, firstRow);
+ WritableUtils.writeVLong(out, rowCount);
+ }
+ }
+
+ /**
+ * A record reader that will generate a range of numbers.
+ */
+ static class RangeRecordReader extends RecordReader<LongWritable,NullWritable> {
+ long startRow;
+ long finishedRows;
+ long totalRows;
+
+ LongWritable currentKey;
+
+ public RangeRecordReader(RangeInputSplit split) {
+ startRow = split.firstRow;
+ finishedRows = 0;
+ totalRows = split.rowCount;
+ }
+
+ public void close() throws IOException {}
+
+ public float getProgress() throws IOException {
+ return finishedRows / (float) totalRows;
+ }
+
+ @Override
+ public LongWritable getCurrentKey() throws IOException, InterruptedException {
+ return new LongWritable(startRow + finishedRows);
+ }
+
+ @Override
+ public NullWritable getCurrentValue() throws IOException, InterruptedException {
+ return NullWritable.get();
+ }
+
+ @Override
+ public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {}
+
+ @Override
+ public boolean nextKeyValue() throws IOException, InterruptedException {
+ if (finishedRows < totalRows) {
+ ++finishedRows;
+ return true;
+ }
+ return false;
+ }
+ }
+
+ public RecordReader<LongWritable,NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException {
+ // reporter.setStatus("Creating record reader");
+ return new RangeRecordReader((RangeInputSplit) split);
+ }
+
+ /**
+ * Create the desired number of splits, dividing the number of rows between the mappers.
+ */
+ public List<InputSplit> getSplits(JobContext job) {
+ long totalRows = job.getConfiguration().getLong(NUMROWS, 0);
+ int numSplits = job.getConfiguration().getInt(NUMSPLITS, 1);
+ long rowsPerSplit = totalRows / numSplits;
+ System.out.println("Generating " + totalRows + " using " + numSplits + " maps with step of " + rowsPerSplit);
+ ArrayList<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
+ long currentRow = 0;
+ for (int split = 0; split < numSplits - 1; ++split) {
+ splits.add(new RangeInputSplit(currentRow, rowsPerSplit));
+ currentRow += rowsPerSplit;
+ }
+ splits.add(new RangeInputSplit(currentRow, totalRows - currentRow));
+ System.out.println("Done Generating.");
+ return splits;
+ }
+
+ }
+
+ private static String NUMSPLITS = "terasort.overridesplits";
+ private static String NUMROWS = "terasort.numrows";
+
+ static class RandomGenerator {
+ private long seed = 0;
+ private static final long mask32 = (1l << 32) - 1;
+ /**
+ * The number of iterations separating the precomputed seeds.
+ */
+ private static final int seedSkip = 128 * 1024 * 1024;
+ /**
+ * The precomputed seed values after every seedSkip iterations. There should be enough values so that a 2**32 iterations are covered.
+ */
+ private static final long[] seeds = new long[] {0L, 4160749568L, 4026531840L, 3892314112L, 3758096384L, 3623878656L, 3489660928L, 3355443200L, 3221225472L,
+ 3087007744L, 2952790016L, 2818572288L, 2684354560L, 2550136832L, 2415919104L, 2281701376L, 2147483648L, 2013265920L, 1879048192L, 1744830464L,
+ 1610612736L, 1476395008L, 1342177280L, 1207959552L, 1073741824L, 939524096L, 805306368L, 671088640L, 536870912L, 402653184L, 268435456L, 134217728L,};
+
+ /**
+ * Start the random number generator on the given iteration.
+ *
+ * @param initalIteration
+ * the iteration number to start on
+ */
+ RandomGenerator(long initalIteration) {
+ int baseIndex = (int) ((initalIteration & mask32) / seedSkip);
+ seed = seeds[baseIndex];
+ for (int i = 0; i < initalIteration % seedSkip; ++i) {
+ next();
+ }
+ }
+
+ RandomGenerator() {
+ this(0);
+ }
+
+ long next() {
+ seed = (seed * 3141592621l + 663896637) & mask32;
+ return seed;
+ }
+ }
+
+ /**
+ * The Mapper class that given a row number, will generate the appropriate output line.
+ */
+ public static class SortGenMapper extends Mapper<LongWritable,NullWritable,Text,Mutation> {
+ private Text table = null;
+ private int minkeylength = 0;
+ private int maxkeylength = 0;
+ private int minvaluelength = 0;
+ private int maxvaluelength = 0;
+
+ private Text key = new Text();
+ private Text value = new Text();
+ private RandomGenerator rand;
+ private byte[] keyBytes; // = new byte[12];
+ private byte[] spaces = " ".getBytes();
+ private byte[][] filler = new byte[26][];
+ {
+ for (int i = 0; i < 26; ++i) {
+ filler[i] = new byte[10];
+ for (int j = 0; j < 10; ++j) {
+ filler[i][j] = (byte) ('A' + i);
+ }
+ }
+ }
+
+ /**
+ * Add a random key to the text
+ */
+ private Random random = new Random();
+
+ private void addKey() {
+ int range = random.nextInt(maxkeylength - minkeylength + 1);
+ int keylen = range + minkeylength;
+ int keyceil = keylen + (4 - (keylen % 4));
+ keyBytes = new byte[keyceil];
+
+ long temp = 0;
+ for (int i = 0; i < keyceil / 4; i++) {
+ temp = rand.next() / 52;
+ keyBytes[3 + 4 * i] = (byte) (' ' + (temp % 95));
+ temp /= 95;
+ keyBytes[2 + 4 * i] = (byte) (' ' + (temp % 95));
+ temp /= 95;
+ keyBytes[1 + 4 * i] = (byte) (' ' + (temp % 95));
+ temp /= 95;
+ keyBytes[4 * i] = (byte) (' ' + (temp % 95));
+ }
+ key.set(keyBytes, 0, keylen);
+ }
+
+ /**
+ * Add the rowid to the row.
+ *
+ * @param rowId
+ */
+ private Text getRowIdString(long rowId) {
+ Text paddedRowIdString = new Text();
+ byte[] rowid = Integer.toString((int) rowId).getBytes();
+ int padSpace = 10 - rowid.length;
+ if (padSpace > 0) {
+ paddedRowIdString.append(spaces, 0, 10 - rowid.length);
+ }
+ paddedRowIdString.append(rowid, 0, Math.min(rowid.length, 10));
+ return paddedRowIdString;
+ }
+
+ /**
+ * Add the required filler bytes. Each row consists of 7 blocks of 10 characters and 1 block of 8 characters.
+ *
+ * @param rowId
+ * the current row number
+ */
+ private void addFiller(long rowId) {
+ int base = (int) ((rowId * 8) % 26);
+
+ // Get Random var
+ Random random = new Random(rand.seed);
+
+ int range = random.nextInt(maxvaluelength - minvaluelength + 1);
+ int valuelen = range + minvaluelength;
+
+ while (valuelen > 10) {
+ value.append(filler[(base + valuelen) % 26], 0, 10);
+ valuelen -= 10;
+ }
+
+ if (valuelen > 0)
+ value.append(filler[(base + valuelen) % 26], 0, valuelen);
+ }
+
+ public void map(LongWritable row, NullWritable ignored, Context context) throws IOException, InterruptedException {
+ context.setStatus("Entering");
+ long rowId = row.get();
+ if (rand == null) {
+ // we use 3 random numbers per a row
+ rand = new RandomGenerator(rowId * 3);
+ }
+ addKey();
+ value.clear();
+ // addRowId(rowId);
+ addFiller(rowId);
+
+ // New
+ Mutation m = new Mutation(key);
+ m.put(new Text("c"), // column family
+ getRowIdString(rowId), // column qual
+ new Value(value.toString().getBytes())); // data
+
+ context.setStatus("About to add to accumulo");
+ context.write(table, m);
+ context.setStatus("Added to accumulo " + key.toString());
+ }
+
+ @Override
+ public void setup(Context job) {
+ minkeylength = job.getConfiguration().getInt("cloudgen.minkeylength", 0);
+ maxkeylength = job.getConfiguration().getInt("cloudgen.maxkeylength", 0);
+ minvaluelength = job.getConfiguration().getInt("cloudgen.minvaluelength", 0);
+ maxvaluelength = job.getConfiguration().getInt("cloudgen.maxvaluelength", 0);
+ table = new Text(job.getConfiguration().get("cloudgen.tablename"));
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ int res = ToolRunner.run(CachedConfiguration.getInstance(), new TeraSortIngest(), args);
+ System.exit(res);
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ Job job = new Job(getConf(), "TeraSortCloud");
+ job.setJarByClass(this.getClass());
+
+ job.setInputFormatClass(RangeInputFormat.class);
+ job.setMapperClass(SortGenMapper.class);
+ job.setMapOutputKeyClass(Text.class);
+ job.setMapOutputValueClass(Mutation.class);
+
+ job.setNumReduceTasks(0);
+
+ job.setOutputFormatClass(AccumuloOutputFormat.class);
+ AccumuloOutputFormat.setZooKeeperInstance(job, args[6], args[7]);
+ AccumuloOutputFormat.setOutputInfo(job, args[8], args[9].getBytes(), true, null);
+ AccumuloOutputFormat.setMaxMutationBufferSize(job, 10L * 1000 * 1000);
+
+ Configuration conf = job.getConfiguration();
+ conf.setLong(NUMROWS, Long.parseLong(args[0]));
+ conf.setInt("cloudgen.minkeylength", Integer.parseInt(args[1]));
+ conf.setInt("cloudgen.maxkeylength", Integer.parseInt(args[2]));
+ conf.setInt("cloudgen.minvaluelength", Integer.parseInt(args[3]));
+ conf.setInt("cloudgen.maxvaluelength", Integer.parseInt(args[4]));
+ conf.set("cloudgen.tablename", args[5]);
+
+ if (args.length > 10)
+ conf.setInt(NUMSPLITS, Integer.parseInt(args[10]));
+
+ job.waitForCompletion(true);
+ return job.isSuccessful() ? 0 : 1;
+ }
+}
Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TeraSortIngest.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/WordCount.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/WordCount.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/WordCount.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/WordCount.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.commons.cli.BasicParser;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.Parser;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * A simple map reduce job that inserts word counts into accumulo. See the README for instructions on how to run this.
+ *
+ */
+public class WordCount extends Configured implements Tool {
+ private static Options opts;
+ private static Option passwordOpt;
+ private static Option usernameOpt;
+ private static String USAGE = "wordCount <instance name> <zoo keepers> <input dir> <output table>";
+
+ static {
+ usernameOpt = new Option("u", "username", true, "username");
+ passwordOpt = new Option("p", "password", true, "password");
+
+ opts = new Options();
+
+ opts.addOption(usernameOpt);
+ opts.addOption(passwordOpt);
+ }
+
+ public static class MapClass extends Mapper<LongWritable,Text,Text,Mutation> {
+ @Override
+ public void map(LongWritable key, Text value, Context output) throws IOException {
+ String[] words = value.toString().split("\\s+");
+
+ for (String word : words) {
+
+ Mutation mutation = new Mutation(new Text(word));
+ mutation.put(new Text("count"), new Text("20080906"), new Value("1".getBytes()));
+
+ try {
+ output.write(null, mutation);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+ }
+
+ public int run(String[] unprocessed_args) throws Exception {
+ Parser p = new BasicParser();
+
+ CommandLine cl = p.parse(opts, unprocessed_args);
+ String[] args = cl.getArgs();
+
+ String username = cl.getOptionValue(usernameOpt.getOpt(), "root");
+ String password = cl.getOptionValue(passwordOpt.getOpt(), "secret");
+
+ if (args.length != 4) {
+ System.out.println("ERROR: Wrong number of parameters: " + args.length + " instead of 4.");
+ return printUsage();
+ }
+
+ Job job = new Job(getConf(), WordCount.class.getName());
+ job.setJarByClass(this.getClass());
+
+ job.setInputFormatClass(TextInputFormat.class);
+ TextInputFormat.setInputPaths(job, new Path(args[2]));
+
+ job.setMapperClass(MapClass.class);
+
+ job.setNumReduceTasks(0);
+
+ job.setOutputFormatClass(AccumuloOutputFormat.class);
+ job.setOutputKeyClass(Text.class);
+ job.setOutputValueClass(Mutation.class);
+ AccumuloOutputFormat.setOutputInfo(job, username, password.getBytes(), true, args[3]);
+ AccumuloOutputFormat.setZooKeeperInstance(job, args[0], args[1]);
+ job.waitForCompletion(true);
+ return 0;
+ }
+
+ private int printUsage() {
+ HelpFormatter hf = new HelpFormatter();
+ hf.printHelp(USAGE, opts);
+ return 0;
+ }
+
+ public static void main(String[] args) throws Exception {
+ int res = ToolRunner.run(CachedConfiguration.getInstance(), new WordCount(), args);
+ System.exit(res);
+ }
+}
Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/WordCount.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/BulkIngestExample.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/BulkIngestExample.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/BulkIngestExample.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/BulkIngestExample.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.mapreduce.bulk;
+
+import java.io.BufferedOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Collection;
+
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.Instance;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat;
+import org.apache.accumulo.core.client.mapreduce.lib.partition.RangePartitioner;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.accumulo.core.util.TextUtil;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * Example map reduce job that bulk ingest data into an accumulo table. The expected input is text files containing tab separated key value pairs on each line.
+ */
+public class BulkIngestExample extends Configured implements Tool {
+ public static class MapClass extends Mapper<LongWritable,Text,Text,Text> {
+ private Text outputKey = new Text();
+ private Text outputValue = new Text();
+
+ @Override
+ public void map(LongWritable key, Text value, Context output) throws IOException, InterruptedException {
+ // split on tab
+ int index = -1;
+ for (int i = 0; i < value.getLength(); i++) {
+ if (value.getBytes()[i] == '\t') {
+ index = i;
+ break;
+ }
+ }
+
+ if (index > 0) {
+ outputKey.set(value.getBytes(), 0, index);
+ outputValue.set(value.getBytes(), index + 1, value.getLength() - (index + 1));
+ output.write(outputKey, outputValue);
+ }
+ }
+ }
+
+ public static class ReduceClass extends Reducer<Text,Text,Key,Value> {
+ public void reduce(Text key, Iterable<Text> values, Context output) throws IOException, InterruptedException {
+ // be careful with the timestamp... if you run on a cluster
+ // where the time is whacked you may not see your updates in
+ // accumulo if there is already an existing value with a later
+ // timestamp in accumulo... so make sure ntp is running on the
+ // cluster or consider using logical time... one options is
+ // to let accumulo set the time
+ long timestamp = System.currentTimeMillis();
+
+ int index = 0;
+ for (Text value : values) {
+ Key outputKey = new Key(key, new Text("foo"), new Text("" + index), timestamp);
+ index++;
+
+ Value outputValue = new Value(value.getBytes(), 0, value.getLength());
+ output.write(outputKey, outputValue);
+ }
+ }
+ }
+
+ public int run(String[] args) {
+ if (args.length != 7) {
+ System.out.println("ERROR: Wrong number of parameters: " + args.length + " instead of 7.");
+ return printUsage();
+ }
+
+ Configuration conf = getConf();
+ PrintStream out = null;
+ try {
+ Job job = new Job(conf, "bulk ingest example");
+ job.setJarByClass(this.getClass());
+
+ job.setInputFormatClass(TextInputFormat.class);
+
+ job.setMapperClass(MapClass.class);
+ job.setMapOutputKeyClass(Text.class);
+ job.setMapOutputValueClass(Text.class);
+
+ job.setReducerClass(ReduceClass.class);
+ job.setOutputFormatClass(AccumuloFileOutputFormat.class);
+
+ Instance instance = new ZooKeeperInstance(args[0], args[1]);
+ String user = args[2];
+ byte[] pass = args[3].getBytes();
+ String tableName = args[4];
+ String inputDir = args[5];
+ String workDir = args[6];
+
+ Connector connector = instance.getConnector(user, pass);
+
+ TextInputFormat.setInputPaths(job, new Path(inputDir));
+ AccumuloFileOutputFormat.setOutputPath(job, new Path(workDir + "/files"));
+
+ FileSystem fs = FileSystem.get(conf);
+ out = new PrintStream(new BufferedOutputStream(fs.create(new Path(workDir + "/splits.txt"))));
+
+ Collection<Text> splits = connector.tableOperations().getSplits(tableName, 100);
+ for (Text split : splits)
+ out.println(new String(Base64.encodeBase64(TextUtil.getBytes(split))));
+
+ job.setNumReduceTasks(splits.size() + 1);
+ out.close();
+
+ job.setPartitionerClass(RangePartitioner.class);
+ RangePartitioner.setSplitFile(job, workDir + "/splits.txt");
+
+ job.waitForCompletion(true);
+ Path failures = new Path(workDir, "failures");
+ fs.delete(failures, true);
+ fs.mkdirs(new Path(workDir, "failures"));
+ connector.tableOperations().importDirectory(tableName, workDir + "/files", workDir + "/failures", false);
+
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ } finally {
+ if (out != null)
+ out.close();
+ }
+
+ return 0;
+ }
+
+ private int printUsage() {
+ System.out.println("accumulo " + this.getClass().getName() + " <instanceName> <zooKeepers> <username> <password> <table> <input dir> <work dir> ");
+ return 0;
+ }
+
+ public static void main(String[] args) throws Exception {
+ int res = ToolRunner.run(CachedConfiguration.getInstance(), new BulkIngestExample(), args);
+ System.exit(res);
+ }
+}
Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/BulkIngestExample.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/GenerateTestData.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/GenerateTestData.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/GenerateTestData.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/GenerateTestData.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.mapreduce.bulk;
+
+import java.io.BufferedOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+public class GenerateTestData {
+
+ public static void main(String[] args) throws IOException {
+ int startRow = Integer.parseInt(args[0]);
+ int numRows = Integer.parseInt(args[1]);
+ String outputFile = args[2];
+
+ Configuration conf = CachedConfiguration.getInstance();
+ FileSystem fs = FileSystem.get(conf);
+
+ PrintStream out = new PrintStream(new BufferedOutputStream(fs.create(new Path(outputFile))));
+
+ for (int i = 0; i < numRows; i++) {
+ out.println(String.format("row_%08d\tvalue_%08d", i + startRow, i + startRow));
+ }
+
+ out.close();
+ }
+
+}
Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/GenerateTestData.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/SetupTable.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/SetupTable.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/SetupTable.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/SetupTable.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.mapreduce.bulk;
+
+import java.util.TreeSet;
+
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.TableExistsException;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.hadoop.io.Text;
+
+public class SetupTable {
+
+ public static void main(String[] args) throws AccumuloException, AccumuloSecurityException, TableExistsException {
+ Connector conn = new ZooKeeperInstance(args[0], args[1]).getConnector(args[2], args[3].getBytes());
+ if (args.length == 5) {
+ // create a basic table
+ conn.tableOperations().create(args[4]);
+ } else if (args.length > 5) {
+ // create a table with initial partitions
+ TreeSet<Text> intialPartitions = new TreeSet<Text>();
+ for (int i = 5; i < args.length; ++i)
+ intialPartitions.add(new Text(args[i]));
+ conn.tableOperations().create(args[4]);
+
+ try {
+ conn.tableOperations().addSplits(args[4], intialPartitions);
+ } catch (TableNotFoundException e) {
+ // unlikely
+ throw new RuntimeException(e);
+ }
+ } else {
+ System.err.println("Usage : SetupTable <instance> <zookeepers> <username> <password> <table name> [<split point>{ <split point}]");
+ }
+ }
+}
Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/SetupTable.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/VerifyIngest.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/VerifyIngest.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/VerifyIngest.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/VerifyIngest.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.mapreduce.bulk;
+
+import java.util.Iterator;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.Constants;
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.Instance;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.hadoop.io.Text;
+import org.apache.log4j.Logger;
+
+public class VerifyIngest {
+ private static final Logger log = Logger.getLogger(VerifyIngest.class);
+
+ public static void main(String[] args) throws AccumuloException, AccumuloSecurityException, TableNotFoundException {
+ if (args.length != 7) {
+ System.err.println("VerifyIngest <instance name> <zoo keepers> <username> <password> <table> <startRow> <numRows> ");
+ return;
+ }
+
+ String instanceName = args[0];
+ String zooKeepers = args[1];
+ String user = args[2];
+ byte[] pass = args[3].getBytes();
+ String table = args[4];
+
+ int startRow = Integer.parseInt(args[5]);
+ int numRows = Integer.parseInt(args[6]);
+
+ Instance instance = new ZooKeeperInstance(instanceName, zooKeepers);
+ Connector connector = instance.getConnector(user, pass);
+ Scanner scanner = connector.createScanner(table, Constants.NO_AUTHS);
+
+ scanner.setRange(new Range(new Text(String.format("row_%08d", startRow)), null));
+
+ Iterator<Entry<Key,Value>> si = scanner.iterator();
+
+ boolean ok = true;
+
+ for (int i = startRow; i < numRows; i++) {
+
+ if (si.hasNext()) {
+ Entry<Key,Value> entry = si.next();
+
+ if (!entry.getKey().getRow().toString().equals(String.format("row_%08d", i))) {
+ log.error("unexpected row key " + entry.getKey().getRow().toString() + " expected " + String.format("row_%08d", i));
+ ok = false;
+ }
+
+ if (!entry.getValue().toString().equals(String.format("value_%08d", i))) {
+ log.error("unexpected value " + entry.getValue().toString() + " expected " + String.format("value_%08d", i));
+ ok = false;
+ }
+
+ } else {
+ log.error("no more rows, expected " + String.format("row_%08d", i));
+ ok = false;
+ break;
+ }
+
+ }
+
+ if (ok)
+ System.out.println("OK");
+
+ System.exit(ok ? 0 : 1);
+ }
+
+}
Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/VerifyIngest.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/ContinuousQuery.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/ContinuousQuery.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/ContinuousQuery.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/ContinuousQuery.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.shard;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Map.Entry;
+import java.util.Random;
+
+import org.apache.accumulo.core.Constants;
+import org.apache.accumulo.core.client.BatchScanner;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.user.IntersectingIterator;
+import org.apache.hadoop.io.Text;
+
+/**
+ * Using the doc2word table created by Reverse.java, this program randomly selects N words per document. Then it continually queries a random set of words in
+ * the shard table (created by Index.java) using the intersecting iterator.
+ *
+ */
+
+public class ContinuousQuery {
+ public static void main(String[] args) throws Exception {
+
+ if (args.length != 7 && args.length != 8) {
+ System.err.println("Usage : " + ContinuousQuery.class.getName()
+ + " <instance> <zoo keepers> <shard table> <doc2word table> <user> <pass> <num query terms> [iterations]");
+ System.exit(-1);
+ }
+
+ String instance = args[0];
+ String zooKeepers = args[1];
+ String table = args[2];
+ String docTable = args[3];
+ String user = args[4];
+ String pass = args[5];
+ int numTerms = Integer.parseInt(args[6]);
+ long iterations = Long.MAX_VALUE;
+ if (args.length > 7)
+ iterations = Long.parseLong(args[7]);
+
+ ZooKeeperInstance zki = new ZooKeeperInstance(instance, zooKeepers);
+ Connector conn = zki.getConnector(user, pass.getBytes());
+
+ ArrayList<Text[]> randTerms = findRandomTerms(conn.createScanner(docTable, Constants.NO_AUTHS), numTerms);
+
+ Random rand = new Random();
+
+ BatchScanner bs = conn.createBatchScanner(table, Constants.NO_AUTHS, 20);
+
+ for (long i = 0; i < iterations; i += 1) {
+ Text[] columns = randTerms.get(rand.nextInt(randTerms.size()));
+
+ bs.clearScanIterators();
+ bs.clearColumns();
+
+ IteratorSetting ii = new IteratorSetting(20, "ii", IntersectingIterator.class);
+ IntersectingIterator.setColumnFamilies(ii, columns);
+ bs.addScanIterator(ii);
+ bs.setRanges(Collections.singleton(new Range()));
+
+ long t1 = System.currentTimeMillis();
+ int count = 0;
+ for (@SuppressWarnings("unused")
+ Entry<Key,Value> entry : bs) {
+ count++;
+ }
+ long t2 = System.currentTimeMillis();
+
+ System.out.printf(" %s %,d %6.3f\n", Arrays.asList(columns), count, (t2 - t1) / 1000.0);
+ }
+
+ bs.close();
+
+ }
+
+ private static ArrayList<Text[]> findRandomTerms(Scanner scanner, int numTerms) {
+
+ Text currentRow = null;
+
+ ArrayList<Text> words = new ArrayList<Text>();
+ ArrayList<Text[]> ret = new ArrayList<Text[]>();
+
+ Random rand = new Random();
+
+ for (Entry<Key,Value> entry : scanner) {
+ Key key = entry.getKey();
+
+ if (currentRow == null)
+ currentRow = key.getRow();
+
+ if (!currentRow.equals(key.getRow())) {
+ selectRandomWords(words, ret, rand, numTerms);
+ words.clear();
+ currentRow = key.getRow();
+ }
+
+ words.add(key.getColumnFamily());
+
+ }
+
+ selectRandomWords(words, ret, rand, numTerms);
+
+ return ret;
+ }
+
+ private static void selectRandomWords(ArrayList<Text> words, ArrayList<Text[]> ret, Random rand, int numTerms) {
+ if (words.size() >= numTerms) {
+ Collections.shuffle(words, rand);
+ Text docWords[] = new Text[numTerms];
+ for (int i = 0; i < docWords.length; i++) {
+ docWords[i] = words.get(i);
+ }
+
+ ret.add(docWords);
+ }
+ }
+}
Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/ContinuousQuery.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Index.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Index.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Index.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Index.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.shard;
+
+import java.io.File;
+import java.io.FileReader;
+import java.util.HashSet;
+
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.hadoop.io.Text;
+
+/**
+ * This program indexes a set of documents given on the command line into a shard table.
+ *
+ * What it writes to the table is row = partition id column family = term column qualifier = document id
+ */
+
+public class Index {
+
+ static Text genPartition(int partition) {
+ return new Text(String.format("%08x", Math.abs(partition)));
+ }
+
+ public static void index(int numPartitions, Text docId, String doc, String splitRegex, BatchWriter bw) throws Exception {
+
+ String[] tokens = doc.split(splitRegex);
+
+ Text partition = genPartition(doc.hashCode() % numPartitions);
+
+ Mutation m = new Mutation(partition);
+
+ HashSet<String> tokensSeen = new HashSet<String>();
+
+ for (String token : tokens) {
+ token = token.toLowerCase();
+
+ if (!tokensSeen.contains(token)) {
+ tokensSeen.add(token);
+ m.put(new Text(token), docId, new Value(new byte[0]));
+ }
+ }
+
+ if (m.size() > 0)
+ bw.addMutation(m);
+ }
+
+ private static void index(int numPartitions, File src, String splitRegex, BatchWriter bw) throws Exception {
+
+ if (src.isDirectory()) {
+ for (File child : src.listFiles()) {
+ index(numPartitions, child, splitRegex, bw);
+ }
+ } else {
+ FileReader fr = new FileReader(src);
+
+ StringBuilder sb = new StringBuilder();
+
+ char data[] = new char[4096];
+ int len;
+ while ((len = fr.read(data)) != -1) {
+ sb.append(data, 0, len);
+ }
+
+ fr.close();
+
+ index(numPartitions, new Text(src.getAbsolutePath()), sb.toString(), splitRegex, bw);
+ }
+
+ }
+
+ private static BatchWriter setupBatchWriter(String instance, String zooKeepers, String table, String user, String pass) throws Exception {
+ ZooKeeperInstance zinstance = new ZooKeeperInstance(instance, zooKeepers);
+ Connector conn = zinstance.getConnector(user, pass.getBytes());
+ return conn.createBatchWriter(table, 50000000, 300000l, 4);
+ }
+
+ public static void main(String[] args) throws Exception {
+
+ if (args.length < 7) {
+ System.err.println("Usage : " + Index.class.getName() + " <instance> <zoo keepers> <table> <user> <pass> <num partitions> <file>{ <file>}");
+ System.exit(-1);
+ }
+
+ String instance = args[0];
+ String zooKeepers = args[1];
+ String table = args[2];
+ String user = args[3];
+ String pass = args[4];
+
+ int numPartitions = Integer.parseInt(args[5]);
+
+ String splitRegex = "\\W+";
+
+ BatchWriter bw = setupBatchWriter(instance, zooKeepers, table, user, pass);
+
+ for (int i = 6; i < args.length; i++) {
+ index(numPartitions, new File(args[i]), splitRegex, bw);
+ }
+
+ bw.close();
+
+ }
+
+}
Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Index.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Query.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Query.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Query.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Query.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.shard;
+
+import java.util.Collections;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.Constants;
+import org.apache.accumulo.core.client.BatchScanner;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.user.IntersectingIterator;
+import org.apache.hadoop.io.Text;
+
+/**
+ * This program queries a set of terms in the shard table (populated by Index.java) using the intersecting iterator.
+ *
+ */
+
+public class Query {
+
+ /**
+ * @param args
+ */
+ public static void main(String[] args) throws Exception {
+
+ if (args.length < 6) {
+ System.err.println("Usage : " + Query.class.getName() + " <instance> <zoo keepers> <table> <user> <pass> <term>{ <term>}");
+ System.exit(-1);
+ }
+
+ String instance = args[0];
+ String zooKeepers = args[1];
+ String table = args[2];
+ String user = args[3];
+ String pass = args[4];
+
+ ZooKeeperInstance zki = new ZooKeeperInstance(instance, zooKeepers);
+ Connector conn = zki.getConnector(user, pass.getBytes());
+
+ BatchScanner bs = conn.createBatchScanner(table, Constants.NO_AUTHS, 20);
+
+ Text columns[] = new Text[args.length - 5];
+ for (int i = 5; i < args.length; i++) {
+ columns[i - 5] = new Text(args[i]);
+ }
+ IteratorSetting ii = new IteratorSetting(20, "ii", IntersectingIterator.class);
+ IntersectingIterator.setColumnFamilies(ii, columns);
+ bs.addScanIterator(ii);
+ bs.setRanges(Collections.singleton(new Range()));
+ for (Entry<Key,Value> entry : bs) {
+ System.out.println(" " + entry.getKey().getColumnQualifier());
+ }
+
+ }
+
+}
Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Query.java
------------------------------------------------------------------------------
svn:eol-style = native