You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by el...@apache.org on 2013/10/19 01:52:02 UTC

[1/6] git commit: ACCUMULO-1783 Update accumulo-core from 1.4.0 to 1.4.4

Updated Branches:
  refs/heads/1.4 [created] c25f26c7a


ACCUMULO-1783 Update accumulo-core from 1.4.0 to 1.4.4


Project: http://git-wip-us.apache.org/repos/asf/accumulo-pig/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-pig/commit/bed61449
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-pig/tree/bed61449
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-pig/diff/bed61449

Branch: refs/heads/1.4
Commit: bed61449ca40145bdb45ae66f8f3f47e7b042fc0
Parents: 8d3ea53
Author: Josh Elser <el...@apache.org>
Authored: Thu Oct 17 15:30:44 2013 -0400
Committer: Josh Elser <el...@apache.org>
Committed: Thu Oct 17 15:30:44 2013 -0400

----------------------------------------------------------------------
 pom.xml | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-pig/blob/bed61449/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 0606ec4..9b910a8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,23 +26,20 @@
       <artifactId>pig</artifactId>
       <version>0.9.2</version>
     </dependency>
-    
     <dependency>
       <groupId>commons-logging</groupId>
       <artifactId>commons-logging</artifactId>
       <version>1.0.4</version>
     </dependency>
-    
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-core</artifactId>
       <version>0.20.2</version>
     </dependency>
-    
     <dependency>
       <groupId>org.apache.accumulo</groupId>
       <artifactId>accumulo-core</artifactId>
-      <version>1.4.0</version>
+      <version>1.4.4</version>
     </dependency>
   </dependencies>
   


[4/6] git commit: ACCUMULO-1783 Remove compiler warnings and add in joda-time.

Posted by el...@apache.org.
ACCUMULO-1783 Remove compiler warnings and add in joda-time.

Pig 0.12.0 needs the joda-time dependency to run.


Project: http://git-wip-us.apache.org/repos/asf/accumulo-pig/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-pig/commit/e1af2f3c
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-pig/tree/e1af2f3c
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-pig/diff/e1af2f3c

Branch: refs/heads/1.4
Commit: e1af2f3cae1c4980b70d5f00385d1585cf958678
Parents: 3ff7ba1
Author: Josh Elser <el...@apache.org>
Authored: Thu Oct 17 15:52:18 2013 -0400
Committer: Josh Elser <el...@apache.org>
Committed: Thu Oct 17 15:52:18 2013 -0400

----------------------------------------------------------------------
 pom.xml                                                     | 9 +++++++--
 .../org/apache/accumulo/pig/AbstractAccumuloStorage.java    | 4 ++++
 2 files changed, 11 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-pig/blob/e1af2f3c/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 14c16eb..249dcce 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,4 +1,4 @@
-<!--
+  <!--
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
   this work for additional information regarding copyright ownership.
@@ -18,7 +18,7 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>org.apache.accumulo</groupId>
   <artifactId>accumulo-pig</artifactId>
-  <version>1.4.0-SNAPSHOT</version>
+  <version>1.4.4-SNAPSHOT</version>
   
   <dependencies>
     <dependency>
@@ -41,6 +41,11 @@
       <artifactId>accumulo-core</artifactId>
       <version>1.4.4</version>
     </dependency>
+    <dependency>
+      <groupId>joda-time</groupId>
+      <artifactId>joda-time</artifactId>
+      <version>1.6</version>
+    </dependency>
   </dependencies>
   
 </project>

http://git-wip-us.apache.org/repos/asf/accumulo-pig/blob/e1af2f3c/src/main/java/org/apache/accumulo/pig/AbstractAccumuloStorage.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/accumulo/pig/AbstractAccumuloStorage.java b/src/main/java/org/apache/accumulo/pig/AbstractAccumuloStorage.java
index 8577f62..d26cf40 100644
--- a/src/main/java/org/apache/accumulo/pig/AbstractAccumuloStorage.java
+++ b/src/main/java/org/apache/accumulo/pig/AbstractAccumuloStorage.java
@@ -101,11 +101,13 @@ public abstract class AbstractAccumuloStorage extends LoadFunc implements StoreF
   protected abstract Tuple getTuple(Key key, Value value) throws IOException;
   
   @Override
+  @SuppressWarnings("rawtypes")
   public InputFormat getInputFormat() {
     return new AccumuloInputFormat();
   }
   
   @Override
+  @SuppressWarnings({"unchecked", "rawtypes"})
   public void prepareToRead(RecordReader reader, PigSplit split) {
     this.reader = reader;
   }
@@ -237,6 +239,7 @@ public abstract class AbstractAccumuloStorage extends LoadFunc implements StoreF
     }
   }
   
+  @SuppressWarnings("rawtypes")
   public OutputFormat getOutputFormat() {
     return new AccumuloOutputFormat();
   }
@@ -245,6 +248,7 @@ public abstract class AbstractAccumuloStorage extends LoadFunc implements StoreF
     // we don't care about types, they all get casted to ByteBuffers
   }
   
+  @SuppressWarnings({"rawtypes", "unchecked"})
   public void prepareToWrite(RecordWriter writer) {
     this.writer = writer;
   }


[3/6] git commit: ACCUMULO-1783 Lift hadoop and pig dependencies to most recent stable

Posted by el...@apache.org.
ACCUMULO-1783 Lift hadoop and pig dependencies to most recent stable


Project: http://git-wip-us.apache.org/repos/asf/accumulo-pig/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-pig/commit/3ff7ba16
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-pig/tree/3ff7ba16
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-pig/diff/3ff7ba16

Branch: refs/heads/1.4
Commit: 3ff7ba16864716ce9aa0aad953fa9a9fbe437c7f
Parents: 0965e28
Author: Josh Elser <el...@apache.org>
Authored: Thu Oct 17 15:36:44 2013 -0400
Committer: Josh Elser <el...@apache.org>
Committed: Thu Oct 17 15:36:44 2013 -0400

----------------------------------------------------------------------
 pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-pig/blob/3ff7ba16/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 9b910a8..14c16eb 100644
--- a/pom.xml
+++ b/pom.xml
@@ -24,7 +24,7 @@
     <dependency>
       <groupId>org.apache.pig</groupId>
       <artifactId>pig</artifactId>
-      <version>0.9.2</version>
+      <version>0.12.0</version>
     </dependency>
     <dependency>
       <groupId>commons-logging</groupId>
@@ -34,7 +34,7 @@
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-core</artifactId>
-      <version>0.20.2</version>
+      <version>1.2.1</version>
     </dependency>
     <dependency>
       <groupId>org.apache.accumulo</groupId>


[2/6] git commit: ACCUMULO-1783 Adding in gitignore

Posted by el...@apache.org.
ACCUMULO-1783 Adding in gitignore


Project: http://git-wip-us.apache.org/repos/asf/accumulo-pig/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-pig/commit/0965e287
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-pig/tree/0965e287
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-pig/diff/0965e287

Branch: refs/heads/1.4
Commit: 0965e287741f964302672e3f2f4343f3c72bde82
Parents: bed6144
Author: Josh Elser <el...@apache.org>
Authored: Thu Oct 17 15:31:26 2013 -0400
Committer: Josh Elser <el...@apache.org>
Committed: Thu Oct 17 15:31:26 2013 -0400

----------------------------------------------------------------------
 .gitignore | 4 ++++
 1 file changed, 4 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-pig/blob/0965e287/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..f0e4507
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+.classpath
+.project
+.settings
+/target


[5/6] git commit: ACCUMULO-1783 A simple little wrapper script for some convenience setting up PIG_CLASSPATH all the time.

Posted by el...@apache.org.
ACCUMULO-1783 A simple little wrapper script for some convenience
setting up PIG_CLASSPATH all the time.


Project: http://git-wip-us.apache.org/repos/asf/accumulo-pig/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-pig/commit/d75f91c4
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-pig/tree/d75f91c4
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-pig/diff/d75f91c4

Branch: refs/heads/1.4
Commit: d75f91c478fb724ef804958f9239d170e1f68bd6
Parents: e1af2f3
Author: Josh Elser <el...@apache.org>
Authored: Fri Oct 18 19:49:09 2013 -0400
Committer: Josh Elser <el...@apache.org>
Committed: Fri Oct 18 19:49:09 2013 -0400

----------------------------------------------------------------------
 accumulo-pig | 7 +++++++
 1 file changed, 7 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-pig/blob/d75f91c4/accumulo-pig
----------------------------------------------------------------------
diff --git a/accumulo-pig b/accumulo-pig
new file mode 100755
index 0000000..7bf96e6
--- /dev/null
+++ b/accumulo-pig
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+export PIG_CLASSPATH=./target/accumulo-pig-1.4.4-SNAPSHOT.jar:$ACCUMULO_HOME/lib/accumulo-core-1.4.5-SNAPSHOT.jar:$ACCUMULO_HOME/lib/libthrift-0.6.1.jar:$ACCUMULO_HOME/lib/cloudtrace-1.4.5-SNAPSHOT.jar
+
+pig -v -x mapreduce 
+
+# vim: ft=sh


[6/6] git commit: ACCUMULO-1783 Building a "better" typed AccumuloStorage.

Posted by el...@apache.org.
ACCUMULO-1783 Building a "better" typed AccumuloStorage.

Took some hints from the AccumuloStorage and HBaseStorage classes on how
to generalize it. (Should) provide serialization of any type
into/out-of accumulo


Project: http://git-wip-us.apache.org/repos/asf/accumulo-pig/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-pig/commit/c25f26c7
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-pig/tree/c25f26c7
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-pig/diff/c25f26c7

Branch: refs/heads/1.4
Commit: c25f26c7a1ebf38c80481d4ccc8b19603ca634dd
Parents: d75f91c
Author: Josh Elser <el...@apache.org>
Authored: Fri Oct 18 19:50:09 2013 -0400
Committer: Josh Elser <el...@apache.org>
Committed: Fri Oct 18 19:50:09 2013 -0400

----------------------------------------------------------------------
 .../accumulo/pig/TypedAccumuloStorage.java      | 207 +++++++++++++++++++
 1 file changed, 207 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-pig/blob/c25f26c7/src/main/java/org/apache/accumulo/pig/TypedAccumuloStorage.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/accumulo/pig/TypedAccumuloStorage.java b/src/main/java/org/apache/accumulo/pig/TypedAccumuloStorage.java
new file mode 100644
index 0000000..30c39c9
--- /dev/null
+++ b/src/main/java/org/apache/accumulo/pig/TypedAccumuloStorage.java
@@ -0,0 +1,207 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.accumulo.pig;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.ColumnVisibility;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.io.Text;
+import org.apache.pig.LoadStoreCaster;
+import org.apache.pig.ResourceSchema;
+import org.apache.pig.LoadPushDown.RequiredFieldList;
+import org.apache.pig.ResourceSchema.ResourceFieldSchema;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.builtin.Utf8StorageConverter;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+import org.apache.pig.impl.util.ObjectSerializer;
+import org.apache.pig.impl.util.UDFContext;
+import org.joda.time.DateTime;
+
+/**
+ * A LoadStoreFunc for retrieving data from and storing data to Accumulo
+ * 
+ * A Key/Val pair will be returned as tuples: (key, colfam, colqual, colvis, timestamp, value). All fields except timestamp are DataByteArray, timestamp is a
+ * long.
+ * 
+ * Tuples can be written in 2 forms: (key, colfam, colqual, colvis, value) OR (key, colfam, colqual, value)
+ * 
+ */
+public class TypedAccumuloStorage extends AbstractAccumuloStorage {
+  private static final Log LOG = LogFactory.getLog(TypedAccumuloStorage.class);
+  protected LoadStoreCaster caster;
+  protected String contextSignature = null;
+  
+  private ResourceSchema schema_;
+  private RequiredFieldList requiredFieldList;
+  
+  public TypedAccumuloStorage() {
+    this.caster = new Utf8StorageConverter();
+  }
+  
+  @Override
+  protected Tuple getTuple(Key key, Value value) throws IOException {
+    // and wrap it in a tuple
+    Tuple tuple = TupleFactory.getInstance().newTuple(6);
+    tuple.set(0, new DataByteArray(key.getRow().getBytes()));
+    tuple.set(1, new DataByteArray(key.getColumnFamily().getBytes()));
+    tuple.set(2, new DataByteArray(key.getColumnQualifier().getBytes()));
+    tuple.set(3, new DataByteArray(key.getColumnVisibility().getBytes()));
+    tuple.set(4, new Long(key.getTimestamp()));
+    tuple.set(5, new DataByteArray(value.get()));
+    return tuple;
+  }
+  
+  @Override
+  public Collection<Mutation> getMutations(Tuple tuple) throws ExecException, IOException {
+    ResourceFieldSchema[] fieldSchemas = (schema_ == null) ? null : schema_.getFields();
+    
+    Text t = tupleToText(tuple, 0, fieldSchemas);
+    
+    Mutation mut = new Mutation(t);
+    Text cf = tupleToText(tuple, 1, fieldSchemas);
+    Text cq = tupleToText(tuple, 2, fieldSchemas);
+    
+    if (tuple.size() > 4) {
+      Text cv = tupleToText(tuple, 3, fieldSchemas);
+      
+      byte[] valueBytes = tupleToBytes(tuple, 4, fieldSchemas);
+      
+      Value val = new Value(valueBytes);
+      if (cv.getLength() == 0) {
+        mut.put(cf, cq, val);
+      } else {
+        mut.put(cf, cq, new ColumnVisibility(cv), val);
+      }
+    } else {
+      byte[] valueBytes = tupleToBytes(tuple, 3, fieldSchemas);
+      Value val = new Value(valueBytes);
+      mut.put(cf, cq, val);
+    }
+    
+    return Collections.singleton(mut);
+  }
+  
+  @Override
+  public void setUDFContextSignature(String signature) {
+    this.contextSignature = signature;
+  }
+  
+  @Override
+  public void setStoreFuncUDFContextSignature(String signature) {
+    this.contextSignature = signature;
+  }
+  
+  /**
+   * Returns UDFProperties based on <code>contextSignature</code>.
+   */
+  private Properties getUDFProperties() {
+    return UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[] {contextSignature});
+  }
+  
+  @Override
+  public void checkSchema(ResourceSchema s) throws IOException {
+    if (!(caster instanceof LoadStoreCaster)) {
+      LOG.error("Caster must implement LoadStoreCaster for writing to HBase.");
+      throw new IOException("Bad Caster " + caster.getClass());
+    }
+    schema_ = s;
+    getUDFProperties().setProperty(contextSignature + "_schema", ObjectSerializer.serialize(schema_));
+  }
+  
+  private Text tupleToText(Tuple tuple, int i, ResourceFieldSchema[] fieldSchemas) throws IOException {
+    Object o = tuple.get(i);
+    byte type = schemaToType(o, i, fieldSchemas);
+    
+    return objToText(o, type);
+  }
+  
+  private byte schemaToType(Object o, int i, ResourceFieldSchema[] fieldSchemas) {
+    return (fieldSchemas == null) ? DataType.findType(o) : fieldSchemas[i].getType();
+  }
+  
+  private byte[] tupleToBytes(Tuple tuple, int i, ResourceFieldSchema[] fieldSchemas) throws IOException {
+    Object o = tuple.get(i);
+    byte type = schemaToType(o, i, fieldSchemas);
+    
+    return objToBytes(o, type);
+    
+  }
+  
+  private Text objToText(Object o, byte type) throws IOException {
+    return new Text(objToBytes(o, type));
+  }
+  
+  @SuppressWarnings("unchecked")
+  private byte[] objToBytes(Object o, byte type) throws IOException {
+    if (o == null)
+      return null;
+    switch (type) {
+      case DataType.BYTEARRAY:
+        return ((DataByteArray) o).get();
+      case DataType.BAG:
+        return caster.toBytes((DataBag) o);
+      case DataType.CHARARRAY:
+        return caster.toBytes((String) o);
+      case DataType.DOUBLE:
+        return caster.toBytes((Double) o);
+      case DataType.FLOAT:
+        return caster.toBytes((Float) o);
+      case DataType.INTEGER:
+        return caster.toBytes((Integer) o);
+      case DataType.LONG:
+        return caster.toBytes((Long) o);
+      case DataType.BIGINTEGER:
+        return caster.toBytes((BigInteger) o);
+      case DataType.BIGDECIMAL:
+        return caster.toBytes((BigDecimal) o);
+      case DataType.BOOLEAN:
+        return caster.toBytes((Boolean) o);
+      case DataType.DATETIME:
+        return caster.toBytes((DateTime) o);
+        
+        // The type conversion here is unchecked.
+        // Relying on DataType.findType to do the right thing.
+      case DataType.MAP:
+        return caster.toBytes((Map<String,Object>) o);
+        
+      case DataType.NULL:
+        return null;
+      case DataType.TUPLE:
+        return caster.toBytes((Tuple) o);
+      case DataType.ERROR:
+        throw new IOException("Unable to determine type of " + o.getClass());
+      default:
+        throw new IOException("Unable to find a converter for tuple field " + o);
+    }
+  }
+  
+}