You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2013/09/12 20:32:16 UTC

svn commit: r1522691 - in /pig/trunk: CHANGES.txt build.xml ivy.xml ivy/libraries.properties src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java test/org/apache/pig/test/TestHBaseStorage.java

Author: daijy
Date: Thu Sep 12 18:32:15 2013
New Revision: 1522691

URL: http://svn.apache.org/r1522691
Log:
PIG-3390: Make pig working with HBase 0.95

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/build.xml
    pig/trunk/ivy.xml
    pig/trunk/ivy/libraries.properties
    pig/trunk/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java
    pig/trunk/test/org/apache/pig/test/TestHBaseStorage.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1522691&r1=1522690&r2=1522691&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Sep 12 18:32:15 2013
@@ -30,6 +30,8 @@ PIG-3174: Remove rpm and deb artifacts f
 
 IMPROVEMENTS
 
+PIG-3390: Make pig working with HBase 0.95 (jarcec via daijy)
+
 PIG-3431: Return more information for parsing related exceptions. (jeremykarn via daijy)
 
 PIG-3430: Add xml format for explaining MapReduce Plan. (jeremykarn via daijy)

Modified: pig/trunk/build.xml
URL: http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1522691&r1=1522690&r2=1522691&view=diff
==============================================================================
--- pig/trunk/build.xml (original)
+++ pig/trunk/build.xml Thu Sep 12 18:32:15 2013
@@ -179,6 +179,17 @@
         <equals arg1="${hadoopversion}" arg2="23"/>
     </condition>
 
+    <!--
+      HBase master version
+      Denotes how the HBase dependencies are layout. Value "94" denotes older
+      format where all HBase code is present in one single jar, which is the
+      way HBase is available up to version 0.94. Value "95" denotes new format
+      where HBase is cut into multiple dependencies per each major subsystem,
+      e.g. "client", "server", ... . Only values "94" and "95" are supported
+      at the moment.
+    -->
+    <property name="hbaseversion" value="94" />
+
     <property name="src.shims.dir" value="${basedir}/shims/src/hadoop${hadoopversion}" />
     <property name="src.shims.test.dir" value="${basedir}/shims/test/hadoop${hadoopversion}" />
 

Modified: pig/trunk/ivy.xml
URL: http://svn.apache.org/viewvc/pig/trunk/ivy.xml?rev=1522691&r1=1522690&r2=1522691&view=diff
==============================================================================
--- pig/trunk/ivy.xml (original)
+++ pig/trunk/ivy.xml Thu Sep 12 18:32:15 2013
@@ -31,7 +31,7 @@
     <conf name="default" extends="master,runtime"/>
     <conf name="runtime" extends="compile,test" description="runtime but not the artifact" />
     <!--Private configurations. -->
-    <conf name="compile" extends="hadoop${hadoopversion}" visibility="private" description="compile artifacts"/>
+    <conf name="compile" extends="hadoop${hadoopversion},hbase${hbaseversion}" visibility="private" description="compile artifacts"/>
     <conf name="test" extends="compile" visibility="private"/>
     <conf name="javadoc" visibility="private" extends="compile,test"/>
     <conf name="releaseaudit" visibility="private"/>
@@ -40,6 +40,8 @@
     <conf name="buildJar" extends="compile,test" visibility="private"/>
     <conf name="hadoop20" visibility="private"/>
     <conf name="hadoop23" visibility="private"/>
+    <conf name="hbase94" visibility="private"/>
+    <conf name="hbase95" visibility="private"/>
   </configurations>
   <publications>
     <artifact name="pig" conf="master"/>
@@ -234,7 +236,8 @@
     <dependency org="org.jruby" name="jruby-complete" rev="${jruby.version}" conf="compile->master"/>
     <dependency org="asm" name="asm" rev="${asm.version}" conf="compile->default"/>
 
-    <dependency org="org.apache.hbase" name="hbase" rev="${hbase.version}" conf="compile->master">
+    <!-- HBase dependency in format for releases up to 0.94 (including) -->
+    <dependency org="org.apache.hbase" name="hbase" rev="${hbase94.version}" conf="hbase94->master">
       <artifact name="hbase" type="jar"/>
       <artifact name="hbase" type="test-jar" ext="jar" m:classifier="tests"/>
       <exclude org="org.apache.thrift" module="thrift"/>
@@ -257,6 +260,85 @@
       <exclude org="asm" module="asm"/>
     </dependency>
 
+    <!-- HBase dependency in format for releases higher or equal to 0.95 -->
+    <dependency org="org.apache.hbase" name="hbase-client" rev="${hbase95.version}" conf="hbase95->master">
+      <artifact name="hbase-client" type="jar"/>
+      <artifact name="hbase-client" type="test-jar" ext="jar" m:classifier="tests"/>
+      <exclude org="org.slf4j" module="slf4j-api"/>
+      <exclude org="org.slf4j" module="slf4j-log4j12" />
+      <exclude org="stax" module="stax-api" />
+      <exclude org="javax.xml.bind" module="jaxb-api" />
+      <exclude org="tomcat" module="jasper-runtime"/>
+      <exclude org="tomcat" module="jasper-compiler"/>
+      <exclude org="com.google.protobuf" module="protobuf-java"/>
+      <exclude org="com.sun.jersey" module="jersey-core"/>
+      <exclude org="com.sun.jersey" module="jersey-server"/>
+      <exclude org="com.sun.jersey" module="jersey-json"/>
+      <exclude org="asm" module="asm"/>
+    </dependency>
+
+    <dependency org="org.apache.hbase" name="hbase-common" rev="${hbase95.version}" conf="hbase95->master">
+      <artifact name="hbase-common" type="jar"/>
+      <artifact name="hbase-common" type="test-jar" ext="jar" m:classifier="tests"/>
+      <exclude org="org.apache.hadoop" module="hadoop-core"/>
+      <exclude org="stax" module="stax-api" />
+      <exclude org="javax.xml.bind" module="jaxb-api" />
+      <exclude org="javax.ws.rs" module="jsr311-api" />
+      <exclude org="tomcat" module="jasper-runtime"/>
+      <exclude org="tomcat" module="jasper-compiler"/>
+      <exclude org="com.sun.jersey" module="jersey-core"/>
+      <exclude org="com.sun.jersey" module="jersey-server"/>
+      <exclude org="com.sun.jersey" module="jersey-json"/>
+      <exclude org="asm" module="asm"/>
+    </dependency>
+
+    <dependency org="org.apache.hbase" name="hbase-server" rev="${hbase95.version}" conf="hbase95->master">
+      <artifact name="hbase-server" type="jar"/>
+      <artifact name="hbase-server" type="test-jar" ext="jar" m:classifier="tests"/>
+      <exclude org="org.apache.hadoop" module="hadoop-core"/>
+      <exclude org="org.slf4j" module="slf4j-api"/>
+      <exclude org="org.slf4j" module="slf4j-log4j12" />
+      <exclude org="stax" module="stax-api" />
+      <exclude org="javax.xml.bind" module="jaxb-api" />
+      <exclude org="javax.ws.rs" module="jsr311-api" />
+      <exclude org="tomcat" module="jasper-runtime"/>
+      <exclude org="tomcat" module="jasper-compiler"/>
+      <exclude org="com.sun.jersey" module="jersey-core"/>
+      <exclude org="com.sun.jersey" module="jersey-server"/>
+      <exclude org="com.sun.jersey" module="jersey-json"/>
+      <exclude org="asm" module="asm"/>
+    </dependency>
+
+    <dependency org="org.apache.hbase" name="hbase-protocol" rev="${hbase95.version}" conf="hbase95->master">
+      <artifact name="hbase-protocol" type="jar"/>
+      <artifact name="hbase-protocol" type="test-jar" ext="jar" m:classifier="tests"/>
+      <exclude org="com.google.protobuf" module="protobuf-java"/>
+    </dependency>
+
+    <dependency org="org.apache.hbase" name="hbase-hadoop-compat" rev="${hbase95.version}" conf="hbase95->master">
+      <artifact name="hbase-hadoop-compat" type="jar"/>
+      <artifact name="hbase-hadoop-compat" type="test-jar" ext="jar" m:classifier="tests"/>
+    </dependency>
+
+    <dependency org="org.apache.hbase" name="hbase-hadoop1-compat" rev="${hbase95.version}" conf="hbase95->master">
+      <artifact name="hbase-hadoop1-compat" type="jar"/>
+      <artifact name="hbase-hadoop1-compat" type="test-jar" ext="jar" m:classifier="tests"/>
+      <exclude org="org.apache.hadoop" module="hadoop-core"/>
+      <exclude org="org.slf4j" module="slf4j-api"/>
+      <exclude org="stax" module="stax-api" />
+      <exclude org="javax.xml.bind" module="jaxb-api" />
+      <exclude org="tomcat" module="jasper-runtime"/>
+      <exclude org="tomcat" module="jasper-compiler"/>
+      <exclude org="com.sun.jersey" module="jersey-core"/>
+      <exclude org="com.sun.jersey" module="jersey-server"/>
+      <exclude org="com.sun.jersey" module="jersey-json"/>
+      <exclude org="asm" module="asm"/>
+    </dependency>
+
+    <dependency org="org.cloudera.htrace" name="htrace-core" rev="2.00" conf="hbase95->master">
+      <artifact name="htrace-core" type="jar"/>
+    </dependency>
+
     <!-- for TestHBaseStorage -->
     <dependency org="com.github.stephenc.high-scale-lib" name="high-scale-lib" rev="${high-scale-lib.version}"
        conf="test->default"/>

Modified: pig/trunk/ivy/libraries.properties
URL: http://svn.apache.org/viewvc/pig/trunk/ivy/libraries.properties?rev=1522691&r1=1522690&r2=1522691&view=diff
==============================================================================
--- pig/trunk/ivy/libraries.properties (original)
+++ pig/trunk/ivy/libraries.properties Thu Sep 12 18:32:15 2013
@@ -41,7 +41,8 @@ hadoop-test.version=1.0.0
 hadoop-common.version=2.0.3-alpha
 hadoop-hdfs.version=2.0.3-alpha
 hadoop-mapreduce.version=2.0.3-alpha
-hbase.version=0.94.1
+hbase94.version=0.94.1
+hbase95.version=0.95.3-hadoop1-SNAPSHOT
 hsqldb.version=1.8.0.10
 hive.version=0.8.0
 httpcomponents.version=4.1

Modified: pig/trunk/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java?rev=1522691&r1=1522690&r2=1522691&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java (original)
+++ pig/trunk/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java Thu Sep 12 18:32:15 2013
@@ -16,10 +16,8 @@
  */
 package org.apache.pig.backend.hadoop.hbase;
 
-import java.io.ByteArrayOutputStream;
 import java.io.DataInput;
 import java.io.DataOutput;
-import java.io.DataOutputStream;
 import java.io.IOException;
 import java.math.BigDecimal;
 import java.math.BigInteger;
@@ -62,7 +60,6 @@ import org.apache.hadoop.hbase.mapreduce
 import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
 import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
 import org.apache.hadoop.hbase.mapreduce.TableSplit;
-import org.apache.hadoop.hbase.util.Base64;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.JobConf;
@@ -81,7 +78,6 @@ import org.apache.pig.OrderedLoadFunc;
 import org.apache.pig.ResourceSchema;
 import org.apache.pig.ResourceSchema.ResourceFieldSchema;
 import org.apache.pig.StoreFuncInterface;
-import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
 import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
 import org.apache.pig.backend.hadoop.hbase.HBaseTableInputFormat.HBaseTableIFBuilder;
 import org.apache.pig.builtin.Utf8StorageConverter;
@@ -302,7 +298,7 @@ public class HBaseStorage extends LoadFu
         if (configuredOptions_.hasOption("minTimestamp")){
             minTimestamp_ = Long.parseLong(configuredOptions_.getOptionValue("minTimestamp"));
         } else {
-            minTimestamp_ = Long.MIN_VALUE;
+            minTimestamp_ = 0;
         }
 
         if (configuredOptions_.hasOption("maxTimestamp")){
@@ -677,6 +673,7 @@ public class HBaseStorage extends LoadFu
         .withLte(lte_)
         .withConf(m_conf)
         .build();
+        inputFormat.setScan(scan);
         return inputFormat;
     }
 
@@ -722,7 +719,6 @@ public class HBaseStorage extends LoadFu
                     new String[] {contextSignature});
             p.setProperty(contextSignature + "_projectedFields", ObjectSerializer.serialize(requiredFieldList));
         }
-        m_conf.set(TableInputFormat.SCAN, convertScanToString(scan));
     }
 
     private void initialiseHBaseClassLoaderResources(Job job) throws IOException {
@@ -807,19 +803,6 @@ public class HBaseStorage extends LoadFu
         return location;
     }
 
-    private static String convertScanToString(Scan scan) {
-        try {
-            ByteArrayOutputStream out = new ByteArrayOutputStream();
-            DataOutputStream dos = new DataOutputStream(out);
-            scan.write(dos);
-            return Base64.encodeBytes(out.toByteArray());
-        } catch (IOException e) {
-            LOG.error(e);
-            return "";
-        }
-
-    }
-
     /**
      * Set up the caster to use for reading values out of, and writing to, HBase.
      */

Modified: pig/trunk/test/org/apache/pig/test/TestHBaseStorage.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestHBaseStorage.java?rev=1522691&r1=1522690&r2=1522691&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestHBaseStorage.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestHBaseStorage.java Thu Sep 12 18:32:15 2013
@@ -38,7 +38,6 @@ import org.apache.hadoop.hbase.util.Byte
 import org.apache.pig.ExecType;
 import org.apache.pig.PigServer;
 import org.apache.pig.backend.executionengine.ExecException;
-import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
 import org.apache.pig.backend.hadoop.hbase.HBaseStorage;
 import org.apache.pig.data.DataByteArray;
 import org.apache.pig.data.Tuple;
@@ -60,8 +59,6 @@ public class TestHBaseStorage {
     private static MiniCluster cluster;
     private static PigServer pig;
 
-    final static int NUM_REGIONSERVERS = 1;
-
     enum DataFormat {
         HBaseBinary, UTF8PlainText,
     }