You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by el...@apache.org on 2013/10/19 01:52:02 UTC
[1/6] git commit: ACCUMULO-1783 Update accumulo-core from 1.4.0 to
1.4.4
Updated Branches:
refs/heads/1.4 [created] c25f26c7a
ACCUMULO-1783 Update accumulo-core from 1.4.0 to 1.4.4
Project: http://git-wip-us.apache.org/repos/asf/accumulo-pig/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-pig/commit/bed61449
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-pig/tree/bed61449
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-pig/diff/bed61449
Branch: refs/heads/1.4
Commit: bed61449ca40145bdb45ae66f8f3f47e7b042fc0
Parents: 8d3ea53
Author: Josh Elser <el...@apache.org>
Authored: Thu Oct 17 15:30:44 2013 -0400
Committer: Josh Elser <el...@apache.org>
Committed: Thu Oct 17 15:30:44 2013 -0400
----------------------------------------------------------------------
pom.xml | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/accumulo-pig/blob/bed61449/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 0606ec4..9b910a8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,23 +26,20 @@
<artifactId>pig</artifactId>
<version>0.9.2</version>
</dependency>
-
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.0.4</version>
</dependency>
-
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>0.20.2</version>
</dependency>
-
<dependency>
<groupId>org.apache.accumulo</groupId>
<artifactId>accumulo-core</artifactId>
- <version>1.4.0</version>
+ <version>1.4.4</version>
</dependency>
</dependencies>
[4/6] git commit: ACCUMULO-1783 Remove compiler warnings and add in
joda-time.
Posted by el...@apache.org.
ACCUMULO-1783 Remove compiler warnings and add in joda-time.
Pig 0.12.0 needs the joda-time dependency to run.
Project: http://git-wip-us.apache.org/repos/asf/accumulo-pig/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-pig/commit/e1af2f3c
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-pig/tree/e1af2f3c
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-pig/diff/e1af2f3c
Branch: refs/heads/1.4
Commit: e1af2f3cae1c4980b70d5f00385d1585cf958678
Parents: 3ff7ba1
Author: Josh Elser <el...@apache.org>
Authored: Thu Oct 17 15:52:18 2013 -0400
Committer: Josh Elser <el...@apache.org>
Committed: Thu Oct 17 15:52:18 2013 -0400
----------------------------------------------------------------------
pom.xml | 9 +++++++--
.../org/apache/accumulo/pig/AbstractAccumuloStorage.java | 4 ++++
2 files changed, 11 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/accumulo-pig/blob/e1af2f3c/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 14c16eb..249dcce 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,4 +1,4 @@
-<!--
+ <!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
@@ -18,7 +18,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>org.apache.accumulo</groupId>
<artifactId>accumulo-pig</artifactId>
- <version>1.4.0-SNAPSHOT</version>
+ <version>1.4.4-SNAPSHOT</version>
<dependencies>
<dependency>
@@ -41,6 +41,11 @@
<artifactId>accumulo-core</artifactId>
<version>1.4.4</version>
</dependency>
+ <dependency>
+ <groupId>joda-time</groupId>
+ <artifactId>joda-time</artifactId>
+ <version>1.6</version>
+ </dependency>
</dependencies>
</project>
http://git-wip-us.apache.org/repos/asf/accumulo-pig/blob/e1af2f3c/src/main/java/org/apache/accumulo/pig/AbstractAccumuloStorage.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/accumulo/pig/AbstractAccumuloStorage.java b/src/main/java/org/apache/accumulo/pig/AbstractAccumuloStorage.java
index 8577f62..d26cf40 100644
--- a/src/main/java/org/apache/accumulo/pig/AbstractAccumuloStorage.java
+++ b/src/main/java/org/apache/accumulo/pig/AbstractAccumuloStorage.java
@@ -101,11 +101,13 @@ public abstract class AbstractAccumuloStorage extends LoadFunc implements StoreF
protected abstract Tuple getTuple(Key key, Value value) throws IOException;
@Override
+ @SuppressWarnings("rawtypes")
public InputFormat getInputFormat() {
return new AccumuloInputFormat();
}
@Override
+ @SuppressWarnings({"unchecked", "rawtypes"})
public void prepareToRead(RecordReader reader, PigSplit split) {
this.reader = reader;
}
@@ -237,6 +239,7 @@ public abstract class AbstractAccumuloStorage extends LoadFunc implements StoreF
}
}
+ @SuppressWarnings("rawtypes")
public OutputFormat getOutputFormat() {
return new AccumuloOutputFormat();
}
@@ -245,6 +248,7 @@ public abstract class AbstractAccumuloStorage extends LoadFunc implements StoreF
// we don't care about types, they all get casted to ByteBuffers
}
+ @SuppressWarnings({"rawtypes", "unchecked"})
public void prepareToWrite(RecordWriter writer) {
this.writer = writer;
}
[3/6] git commit: ACCUMULO-1783 Lift hadoop and pig dependencies to
most recent stable
Posted by el...@apache.org.
ACCUMULO-1783 Lift hadoop and pig dependencies to most recent stable
Project: http://git-wip-us.apache.org/repos/asf/accumulo-pig/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-pig/commit/3ff7ba16
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-pig/tree/3ff7ba16
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-pig/diff/3ff7ba16
Branch: refs/heads/1.4
Commit: 3ff7ba16864716ce9aa0aad953fa9a9fbe437c7f
Parents: 0965e28
Author: Josh Elser <el...@apache.org>
Authored: Thu Oct 17 15:36:44 2013 -0400
Committer: Josh Elser <el...@apache.org>
Committed: Thu Oct 17 15:36:44 2013 -0400
----------------------------------------------------------------------
pom.xml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/accumulo-pig/blob/3ff7ba16/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 9b910a8..14c16eb 100644
--- a/pom.xml
+++ b/pom.xml
@@ -24,7 +24,7 @@
<dependency>
<groupId>org.apache.pig</groupId>
<artifactId>pig</artifactId>
- <version>0.9.2</version>
+ <version>0.12.0</version>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
@@ -34,7 +34,7 @@
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
- <version>0.20.2</version>
+ <version>1.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.accumulo</groupId>
[2/6] git commit: ACCUMULO-1783 Adding in gitignore
Posted by el...@apache.org.
ACCUMULO-1783 Adding in gitignore
Project: http://git-wip-us.apache.org/repos/asf/accumulo-pig/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-pig/commit/0965e287
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-pig/tree/0965e287
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-pig/diff/0965e287
Branch: refs/heads/1.4
Commit: 0965e287741f964302672e3f2f4343f3c72bde82
Parents: bed6144
Author: Josh Elser <el...@apache.org>
Authored: Thu Oct 17 15:31:26 2013 -0400
Committer: Josh Elser <el...@apache.org>
Committed: Thu Oct 17 15:31:26 2013 -0400
----------------------------------------------------------------------
.gitignore | 4 ++++
1 file changed, 4 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/accumulo-pig/blob/0965e287/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..f0e4507
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+.classpath
+.project
+.settings
+/target
[5/6] git commit: ACCUMULO-1783 A simple little wrapper script for
some convenience setting up PIG_CLASSPATH all the time.
Posted by el...@apache.org.
ACCUMULO-1783 A simple little wrapper script for some convenience
setting up PIG_CLASSPATH all the time.
Project: http://git-wip-us.apache.org/repos/asf/accumulo-pig/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-pig/commit/d75f91c4
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-pig/tree/d75f91c4
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-pig/diff/d75f91c4
Branch: refs/heads/1.4
Commit: d75f91c478fb724ef804958f9239d170e1f68bd6
Parents: e1af2f3
Author: Josh Elser <el...@apache.org>
Authored: Fri Oct 18 19:49:09 2013 -0400
Committer: Josh Elser <el...@apache.org>
Committed: Fri Oct 18 19:49:09 2013 -0400
----------------------------------------------------------------------
accumulo-pig | 7 +++++++
1 file changed, 7 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/accumulo-pig/blob/d75f91c4/accumulo-pig
----------------------------------------------------------------------
diff --git a/accumulo-pig b/accumulo-pig
new file mode 100755
index 0000000..7bf96e6
--- /dev/null
+++ b/accumulo-pig
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+export PIG_CLASSPATH=./target/accumulo-pig-1.4.4-SNAPSHOT.jar:$ACCUMULO_HOME/lib/accumulo-core-1.4.5-SNAPSHOT.jar:$ACCUMULO_HOME/lib/libthrift-0.6.1.jar:$ACCUMULO_HOME/lib/cloudtrace-1.4.5-SNAPSHOT.jar
+
+pig -v -x mapreduce
+
+# vim: ft=sh
[6/6] git commit: ACCUMULO-1783 Building a "better" typed
AccumuloStorage.
Posted by el...@apache.org.
ACCUMULO-1783 Building a "better" typed AccumuloStorage.
Took some hints from the AccumuloStorage and HBaseStorage classes on how
to generalize it. (Should) provide serialization of any type
into/out-of accumulo
Project: http://git-wip-us.apache.org/repos/asf/accumulo-pig/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-pig/commit/c25f26c7
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-pig/tree/c25f26c7
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-pig/diff/c25f26c7
Branch: refs/heads/1.4
Commit: c25f26c7a1ebf38c80481d4ccc8b19603ca634dd
Parents: d75f91c
Author: Josh Elser <el...@apache.org>
Authored: Fri Oct 18 19:50:09 2013 -0400
Committer: Josh Elser <el...@apache.org>
Committed: Fri Oct 18 19:50:09 2013 -0400
----------------------------------------------------------------------
.../accumulo/pig/TypedAccumuloStorage.java | 207 +++++++++++++++++++
1 file changed, 207 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/accumulo-pig/blob/c25f26c7/src/main/java/org/apache/accumulo/pig/TypedAccumuloStorage.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/accumulo/pig/TypedAccumuloStorage.java b/src/main/java/org/apache/accumulo/pig/TypedAccumuloStorage.java
new file mode 100644
index 0000000..30c39c9
--- /dev/null
+++ b/src/main/java/org/apache/accumulo/pig/TypedAccumuloStorage.java
@@ -0,0 +1,207 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.accumulo.pig;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.ColumnVisibility;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.io.Text;
+import org.apache.pig.LoadStoreCaster;
+import org.apache.pig.ResourceSchema;
+import org.apache.pig.LoadPushDown.RequiredFieldList;
+import org.apache.pig.ResourceSchema.ResourceFieldSchema;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.builtin.Utf8StorageConverter;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+import org.apache.pig.impl.util.ObjectSerializer;
+import org.apache.pig.impl.util.UDFContext;
+import org.joda.time.DateTime;
+
+/**
+ * A LoadStoreFunc for retrieving data from and storing data to Accumulo
+ *
+ * A Key/Val pair will be returned as tuples: (key, colfam, colqual, colvis, timestamp, value). All fields except timestamp are DataByteArray, timestamp is a
+ * long.
+ *
+ * Tuples can be written in 2 forms: (key, colfam, colqual, colvis, value) OR (key, colfam, colqual, value)
+ *
+ */
+public class TypedAccumuloStorage extends AbstractAccumuloStorage {
+ private static final Log LOG = LogFactory.getLog(TypedAccumuloStorage.class);
+ protected LoadStoreCaster caster;
+ protected String contextSignature = null;
+
+ private ResourceSchema schema_;
+ private RequiredFieldList requiredFieldList;
+
+ public TypedAccumuloStorage() {
+ this.caster = new Utf8StorageConverter();
+ }
+
+ @Override
+ protected Tuple getTuple(Key key, Value value) throws IOException {
+ // and wrap it in a tuple
+ Tuple tuple = TupleFactory.getInstance().newTuple(6);
+ tuple.set(0, new DataByteArray(key.getRow().getBytes()));
+ tuple.set(1, new DataByteArray(key.getColumnFamily().getBytes()));
+ tuple.set(2, new DataByteArray(key.getColumnQualifier().getBytes()));
+ tuple.set(3, new DataByteArray(key.getColumnVisibility().getBytes()));
+ tuple.set(4, new Long(key.getTimestamp()));
+ tuple.set(5, new DataByteArray(value.get()));
+ return tuple;
+ }
+
+ @Override
+ public Collection<Mutation> getMutations(Tuple tuple) throws ExecException, IOException {
+ ResourceFieldSchema[] fieldSchemas = (schema_ == null) ? null : schema_.getFields();
+
+ Text t = tupleToText(tuple, 0, fieldSchemas);
+
+ Mutation mut = new Mutation(t);
+ Text cf = tupleToText(tuple, 1, fieldSchemas);
+ Text cq = tupleToText(tuple, 2, fieldSchemas);
+
+ if (tuple.size() > 4) {
+ Text cv = tupleToText(tuple, 3, fieldSchemas);
+
+ byte[] valueBytes = tupleToBytes(tuple, 4, fieldSchemas);
+
+ Value val = new Value(valueBytes);
+ if (cv.getLength() == 0) {
+ mut.put(cf, cq, val);
+ } else {
+ mut.put(cf, cq, new ColumnVisibility(cv), val);
+ }
+ } else {
+ byte[] valueBytes = tupleToBytes(tuple, 3, fieldSchemas);
+ Value val = new Value(valueBytes);
+ mut.put(cf, cq, val);
+ }
+
+ return Collections.singleton(mut);
+ }
+
+ @Override
+ public void setUDFContextSignature(String signature) {
+ this.contextSignature = signature;
+ }
+
+ @Override
+ public void setStoreFuncUDFContextSignature(String signature) {
+ this.contextSignature = signature;
+ }
+
+ /**
+ * Returns UDFProperties based on <code>contextSignature</code>.
+ */
+ private Properties getUDFProperties() {
+ return UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[] {contextSignature});
+ }
+
+ @Override
+ public void checkSchema(ResourceSchema s) throws IOException {
+ if (!(caster instanceof LoadStoreCaster)) {
+ LOG.error("Caster must implement LoadStoreCaster for writing to HBase.");
+ throw new IOException("Bad Caster " + caster.getClass());
+ }
+ schema_ = s;
+ getUDFProperties().setProperty(contextSignature + "_schema", ObjectSerializer.serialize(schema_));
+ }
+
+ private Text tupleToText(Tuple tuple, int i, ResourceFieldSchema[] fieldSchemas) throws IOException {
+ Object o = tuple.get(i);
+ byte type = schemaToType(o, i, fieldSchemas);
+
+ return objToText(o, type);
+ }
+
+ private byte schemaToType(Object o, int i, ResourceFieldSchema[] fieldSchemas) {
+ return (fieldSchemas == null) ? DataType.findType(o) : fieldSchemas[i].getType();
+ }
+
+ private byte[] tupleToBytes(Tuple tuple, int i, ResourceFieldSchema[] fieldSchemas) throws IOException {
+ Object o = tuple.get(i);
+ byte type = schemaToType(o, i, fieldSchemas);
+
+ return objToBytes(o, type);
+
+ }
+
+ private Text objToText(Object o, byte type) throws IOException {
+ return new Text(objToBytes(o, type));
+ }
+
+ @SuppressWarnings("unchecked")
+ private byte[] objToBytes(Object o, byte type) throws IOException {
+ if (o == null)
+ return null;
+ switch (type) {
+ case DataType.BYTEARRAY:
+ return ((DataByteArray) o).get();
+ case DataType.BAG:
+ return caster.toBytes((DataBag) o);
+ case DataType.CHARARRAY:
+ return caster.toBytes((String) o);
+ case DataType.DOUBLE:
+ return caster.toBytes((Double) o);
+ case DataType.FLOAT:
+ return caster.toBytes((Float) o);
+ case DataType.INTEGER:
+ return caster.toBytes((Integer) o);
+ case DataType.LONG:
+ return caster.toBytes((Long) o);
+ case DataType.BIGINTEGER:
+ return caster.toBytes((BigInteger) o);
+ case DataType.BIGDECIMAL:
+ return caster.toBytes((BigDecimal) o);
+ case DataType.BOOLEAN:
+ return caster.toBytes((Boolean) o);
+ case DataType.DATETIME:
+ return caster.toBytes((DateTime) o);
+
+ // The type conversion here is unchecked.
+ // Relying on DataType.findType to do the right thing.
+ case DataType.MAP:
+ return caster.toBytes((Map<String,Object>) o);
+
+ case DataType.NULL:
+ return null;
+ case DataType.TUPLE:
+ return caster.toBytes((Tuple) o);
+ case DataType.ERROR:
+ throw new IOException("Unable to determine type of " + o.getClass());
+ default:
+ throw new IOException("Unable to find a converter for tuple field " + o);
+ }
+ }
+
+}