You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ga...@apache.org on 2009/11/05 18:55:55 UTC

svn commit: r833102 - in /hadoop/pig/trunk: ./ lib/ src/org/apache/pig/backend/hadoop/hbase/ test/org/apache/pig/test/

Author: gates
Date: Thu Nov  5 17:55:54 2009
New Revision: 833102

URL: http://svn.apache.org/viewvc?rev=833102&view=rev
Log:
PIG-970:  Changes to make HBase loader work with HBase 0.20


Added:
    hadoop/pig/trunk/lib/hbase-0.20.0-test.jar   (with props)
    hadoop/pig/trunk/lib/hbase-0.20.0.jar   (with props)
    hadoop/pig/trunk/lib/zookeeper-hbase-1329.jar   (with props)
Removed:
    hadoop/pig/trunk/lib/hbase-0.18.1-test.jar
    hadoop/pig/trunk/lib/hbase-0.18.1.jar
Modified:
    hadoop/pig/trunk/CHANGES.txt
    hadoop/pig/trunk/build.xml
    hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/hbase/HBaseSlice.java
    hadoop/pig/trunk/test/org/apache/pig/test/TestHBaseStorage.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=833102&r1=833101&r2=833102&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Thu Nov  5 17:55:54 2009
@@ -113,6 +113,9 @@
 
 BUG FIXES
 
+PIG-970:  Changes to make HBase loader work with HBase 0.20 (vbarat and zjffdu
+	      via gates)
+
 PIG-1035: support for skewed outer join (sriranjan via pradeepkth)
 
 PIG-1030: explain and dump not working with two UDFs inside inner plan of

Modified: hadoop/pig/trunk/build.xml
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/build.xml?rev=833102&r1=833101&r2=833102&view=diff
==============================================================================
--- hadoop/pig/trunk/build.xml (original)
+++ hadoop/pig/trunk/build.xml Thu Nov  5 17:55:54 2009
@@ -48,9 +48,10 @@
     <property name="build.encoding" value="UTF8" />
     <!-- TODO with only one version of hadoop in the lib folder we do not need that anymore -->
     <property name="hadoop.jarfile" value="hadoop20.jar" />
-    <property name="hbase.jarfile" value="hbase-0.18.1.jar" />
-    <property name="hbase.test.jarfile" value="hbase-0.18.1-test.jar" />
-
+    <property name="hbase.jarfile" value="hbase-0.20.0.jar" />
+    <property name="hbase.test.jarfile" value="hbase-0.20.0-test.jar" />
+	<property name="zookeeper.jarfile" value="zookeeper-hbase-1329.jar" />
+	
     <!-- javac properties -->
     <property name="javac.debug" value="on" />
     <property name="javac.optimize" value="on" />
@@ -166,6 +167,7 @@
         <fileset file="${lib.dir}/${hadoop.jarfile}" />
         <fileset file="${lib.dir}/${hbase.jarfile}" />
         <fileset file="${lib.dir}/${hbase.test.jarfile}" />
+    	<fileset file="${lib.dir}/${zookeeper.jarfile}"/>
    <!-- <fileset file="${lib.dir}/commons-collections-3.2.jar" />  -->
     </path>
 
@@ -285,7 +287,8 @@
             <param name="dist" value="${test.build.classes}" />
             <param name="cp" value="test.classpath" />
         </antcall>
-
+    	
+    	<copy file="${basedir}/test/hbase-site.xml" tofile="${test.build.classes}/hbase-site.xml"/>
     </target>
 
     <!-- This target is for default compilation -->
@@ -502,6 +505,7 @@
                 <pathelement location="${output.jarfile.withouthadoop}" />
                 <pathelement location="${test.build.classes}" />
                 <pathelement location="${junit.hadoop.conf}" />
+            	
 		<pathelement path="${clover.jar}"/>
                 <path refid="classpath"/>
             </classpath>
@@ -524,7 +528,7 @@
                     <!-- Excluded under Windows.-->
                     <exclude name="**/TestHBaseStorage.java" if="isWindows" />
                     <!-- Excluced because we don't want to run them -->
-                    <exclude name="**/TestHBaseStorage.java" />
+                    <!-- <exclude name="**/TestHBaseStorage.java" /> -->
                     <exclude name="**/PigExecTestCase.java" />
                     <exclude name="**/TypeCheckingTestUtil.java" />
                     <exclude name="**/TypeGraphPrinter.java" />

Added: hadoop/pig/trunk/lib/hbase-0.20.0-test.jar
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/lib/hbase-0.20.0-test.jar?rev=833102&view=auto
==============================================================================
Binary file - no diff available.

Propchange: hadoop/pig/trunk/lib/hbase-0.20.0-test.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: hadoop/pig/trunk/lib/hbase-0.20.0.jar
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/lib/hbase-0.20.0.jar?rev=833102&view=auto
==============================================================================
Binary file - no diff available.

Propchange: hadoop/pig/trunk/lib/hbase-0.20.0.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: hadoop/pig/trunk/lib/zookeeper-hbase-1329.jar
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/lib/zookeeper-hbase-1329.jar?rev=833102&view=auto
==============================================================================
Binary file - no diff available.

Propchange: hadoop/pig/trunk/lib/zookeeper-hbase-1329.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/hbase/HBaseSlice.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/hbase/HBaseSlice.java?rev=833102&r1=833101&r2=833102&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/hbase/HBaseSlice.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/hbase/HBaseSlice.java Thu Nov  5 17:55:54 2009
@@ -24,9 +24,9 @@
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.UnknownScannerException;
 import org.apache.hadoop.hbase.client.HTable;
-import org.apache.hadoop.hbase.client.Scanner;
-import org.apache.hadoop.hbase.io.Cell;
-import org.apache.hadoop.hbase.io.RowResult;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.pig.Slice;
@@ -61,7 +61,7 @@
     /** The connection to the table in Hbase **/
     private transient HTable m_table;
     /** The scanner over the table **/
-    private transient Scanner m_scanner;
+    private transient ResultScanner m_scanner;
 
     private transient ArrayList<Object> mProtoTuple;
 
@@ -153,7 +153,8 @@
     @Override
     public void init(DataStorage store) throws IOException {
         LOG.info("Init Hbase Slice " + this);
-        HBaseConfiguration conf = new HBaseConfiguration();
+        
+        HBaseConfiguration conf=new HBaseConfiguration();
         // connect to the given table
         m_table = new HTable(conf, m_tableName);
         // init the scanner
@@ -178,17 +179,18 @@
      * @throws IOException
      */
     private void restart(byte[] startRow) throws IOException {
+	Scan scan;
         if ((m_endRow != null) && (m_endRow.length > 0)) {
-            this.m_scanner = this.m_table.getScanner(m_inputColumns, startRow,
-                    m_endRow);
+	    scan = new Scan(startRow, m_endRow);
         } else {
-            this.m_scanner = this.m_table.getScanner(m_inputColumns, startRow);
+	    scan = new Scan(startRow);
         }
+	this.m_scanner = this.m_table.getScanner(scan);
     }
 
     @Override
     public boolean next(Tuple value) throws IOException {
-        RowResult result;
+        Result result;
         try {
             result = this.m_scanner.next();
         } catch (UnknownScannerException e) {
@@ -215,15 +217,14 @@
      * @param tuple
      *            tuple
      */
-    private void convertResultToTuple(RowResult result, Tuple tuple) {
+    private void convertResultToTuple(Result result, Tuple tuple) {
         if (mProtoTuple == null)
             mProtoTuple = new ArrayList<Object>();
 
-        Cell cell = null;
         byte[] value = null;
         for (byte[] column : m_inputColumns) {
-            cell = result.get(column);
-            if (cell == null || (value = cell.getValue()) == null) {
+            value = result.getValue(column);
+            if (value == null) {
                 mProtoTuple.add(null);
             } else {
                 mProtoTuple.add(new DataByteArray(value));

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestHBaseStorage.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestHBaseStorage.java?rev=833102&r1=833101&r2=833102&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestHBaseStorage.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestHBaseStorage.java Thu Nov  5 17:55:54 2009
@@ -16,19 +16,27 @@
  */
 package org.apache.pig.test;
 
+import java.io.File;
 import java.io.IOException;
 import java.util.Iterator;
 
+import junit.framework.TestCase;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.MiniZooKeeperCluster;
 import org.apache.hadoop.hbase.client.HBaseAdmin;
 import org.apache.hadoop.hbase.client.HConnectionManager;
 import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.io.BatchUpdate;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.pig.ExecType;
@@ -37,11 +45,10 @@
 import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
 import org.apache.pig.data.DataByteArray;
 import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.io.FileLocalizer;
 import org.junit.Before;
 import org.junit.Test;
 
-import junit.framework.TestCase;
-
 /** {@link org.apache.pig.backend.hadoop.hbase.HBaseStorage} Test Case **/
 public class TestHBaseStorage extends TestCase {
 
@@ -51,6 +58,7 @@
     private MiniCluster cluster = MiniCluster.buildCluster();
     private HBaseConfiguration conf;
     private MiniHBaseCluster hbaseCluster;
+    private MiniZooKeeperCluster zooKeeperCluster;
     
     private PigServer pig;
     
@@ -70,8 +78,23 @@
     @Override
     protected void setUp() throws Exception {
         super.setUp();
+        
         conf = new HBaseConfiguration(ConfigurationUtil.
              toConfiguration(cluster.getProperties()));
+        conf.set("fs.default.name", cluster.getFileSystem().getUri().toString());
+        Path parentdir = cluster.getFileSystem().getHomeDirectory();
+        conf.set(HConstants.HBASE_DIR, parentdir.toString());
+        
+        // Make the thread wake frequency a little slower so other threads
+        // can run
+        conf.setInt("hbase.server.thread.wakefrequency", 2000);
+        
+        // Make lease timeout longer, lease checks less frequent
+        conf.setInt("hbase.master.lease.period", 10 * 1000);
+        
+        // Increase the amount of time between client retries
+        conf.setLong("hbase.client.pause", 15 * 1000);
+        
         try {
             hBaseClusterSetup();
         } catch (Exception e) {
@@ -81,17 +104,28 @@
             throw e;
         }
         
-        pig = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
+        pig = new PigServer(ExecType.MAPREDUCE, ConfigurationUtil.toProperties(conf));
     }
     
     /**
      * Actually start the MiniHBase instance.
      */
     protected void hBaseClusterSetup() throws Exception {
+        zooKeeperCluster = new MiniZooKeeperCluster();
+        int clientPort = this.zooKeeperCluster.startup(new File("build/test"));
+        conf.set("hbase.zookeeper.property.clientPort",clientPort+"");
       // start the mini cluster
       hbaseCluster = new MiniHBaseCluster(conf, NUM_REGIONSERVERS);
       // opening the META table ensures that cluster is running
-      new HTable(conf, HConstants.META_TABLE_NAME);
+      while(true){
+    	  try{
+    		  new HTable(conf, HConstants.META_TABLE_NAME);
+    		  break;
+    	  }catch(IOException e){
+    		  Thread.sleep(1000);
+    	  }
+    	  
+      }
     }
 
     @Override
@@ -108,6 +142,13 @@
                     LOG.warn("Closing mini hbase cluster", e);
                 }
             }
+            if (zooKeeperCluster!=null){
+            	try{
+            		zooKeeperCluster.shutdown();
+            	} catch (IOException e){
+            		LOG.warn("Closing zookeeper cluster",e);
+            	}
+            }
         } catch (Exception e) {
             LOG.error(e);
         }
@@ -122,6 +163,7 @@
     @Test
     public void testLoadFromHBase() throws IOException, ExecException {
         prepareTable();
+
         pig.registerQuery("a = load 'hbase://" + TESTTABLE + "' using " +
             "org.apache.pig.backend.hadoop.hbase.HBaseStorage('" + TESTCOLUMN_A + 
             " " + TESTCOLUMN_B + " " + TESTCOLUMN_C + "') as (col_a, col_b:int, col_c);");