You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2010/09/30 15:16:44 UTC

svn commit: r1003056 - in /hadoop/hive/trunk: CHANGES.txt ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java ql/src/test/queries/clientpositive/mapjoin1.q ql/src/test/results/clientpositive/mapjoin1.q.out

Author: namit
Date: Thu Sep 30 13:16:44 2010
New Revision: 1003056

URL: http://svn.apache.org/viewvc?rev=1003056&view=rev
Log:
HIVE-1670 MapJoin throws an error if no column from the mapjoined table is selected
(Ning Zhang via namit)


Added:
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin1.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin1.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=1003056&r1=1003055&r2=1003056&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Thu Sep 30 13:16:44 2010
@@ -309,6 +309,9 @@ Trunk -  Unreleased
     HIVE-1671 multithreading on Context.pathToCS
     (Bennie Schut via namit)
 
+    HIVE-1670 MapJoin throws an error if no column from the mapjoined table is selected
+    (Ning Zhang via namit)
+
   TESTS
 
     HIVE-1464. improve  test query performance

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java?rev=1003056&r1=1003055&r2=1003056&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java Thu Sep 30 13:16:44 2010
@@ -24,6 +24,8 @@ import java.io.ObjectInput;
 import java.io.ObjectOutput;
 import java.util.ArrayList;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
@@ -33,7 +35,6 @@ import org.apache.hadoop.hive.serde2.Ser
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
 import org.apache.hadoop.io.Writable;
-
 /**
  * Map Join Object used for both key and value.
  */
@@ -43,6 +44,7 @@ public class MapJoinObjectValue implemen
   protected transient RowContainer obj;
   protected transient Configuration conf;
   protected int bucketSize; // bucket size for RowContainer
+  protected Log LOG = LogFactory.getLog(this.getClass().getName());
 
   public MapJoinObjectValue() {
     bucketSize = 100; // default bucket size
@@ -71,7 +73,6 @@ public class MapJoinObjectValue implemen
         }
       }
     }
-
     return false;
   }
 
@@ -90,19 +91,25 @@ public class MapJoinObjectValue implemen
       MapJoinObjectCtx ctx = MapJoinOperator.getMapMetadata().get(
           Integer.valueOf(metadataTag));
       int sz = in.readInt();
+
       RowContainer res = new RowContainer(bucketSize, ctx.getConf());
       res.setSerDe(ctx.getSerDe(), ctx.getStandardOI());
       res.setTableDesc(ctx.getTblDesc());
-      for (int pos = 0; pos < sz; pos++) {
-        Writable val = ctx.getSerDe().getSerializedClass().newInstance();
-        val.readFields(in);
-
-        ArrayList<Object> memObj = (ArrayList<Object>) ObjectInspectorUtils
-            .copyToStandardObject(ctx.getSerDe().deserialize(val), ctx
-            .getSerDe().getObjectInspector(),
-            ObjectInspectorCopyOption.WRITABLE);
+      if (sz > 0) {
+        int numCols = in.readInt();
+        if (numCols > 0) {
+          for (int pos = 0; pos < sz; pos++) {
+            Writable val = ctx.getSerDe().getSerializedClass().newInstance();
+            val.readFields(in);
+
+            ArrayList<Object> memObj = (ArrayList<Object>) ObjectInspectorUtils
+              .copyToStandardObject(ctx.getSerDe().deserialize(val), ctx
+              .getSerDe().getObjectInspector(),
+               ObjectInspectorCopyOption.WRITABLE);
 
-        res.add(memObj);
+            res.add(memObj);
+          }
+        }
       }
       obj = res;
     } catch (Exception e) {
@@ -123,10 +130,16 @@ public class MapJoinObjectValue implemen
       // Different processing for key and value
       RowContainer<ArrayList<Object>> v = obj;
       out.writeInt(v.size());
-
-      for (ArrayList<Object> row = v.first(); row != null; row = v.next()) {
-        Writable outVal = ctx.getSerDe().serialize(row, ctx.getStandardOI());
-        outVal.write(out);
+      if (v.size() > 0) {
+        ArrayList<Object> row = v.first();
+        out.writeInt(row.size());
+
+        if (row.size() > 0) {
+          for (; row != null; row = v.next()) {
+            Writable outVal = ctx.getSerDe().serialize(row, ctx.getStandardOI());
+            outVal.write(out);
+          }
+        }
       }
     } catch (SerDeException e) {
       throw new IOException(e);

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin1.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin1.q?rev=1003056&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin1.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin1.q Thu Sep 30 13:16:44 2010
@@ -0,0 +1,5 @@
+set hive.mapjoin.cache.numrows=100;
+
+SELECT  /*+ MAPJOIN(b) */ sum(a.key) as sum_a
+	FROM srcpart a
+	JOIN src b ON a.key = b.key where a.ds is not null;

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin1.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin1.q.out?rev=1003056&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin1.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin1.q.out Thu Sep 30 13:16:44 2010
@@ -0,0 +1,21 @@
+PREHOOK: query: SELECT  /*+ MAPJOIN(b) */ sum(a.key) as sum_a
+	FROM srcpart a
+	JOIN src b ON a.key = b.key where a.ds is not null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-24_21-14-16_226_3903968223903966652/-mr-10000
+POSTHOOK: query: SELECT  /*+ MAPJOIN(b) */ sum(a.key) as sum_a
+	FROM srcpart a
+	JOIN src b ON a.key = b.key where a.ds is not null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-24_21-14-16_226_3903968223903966652/-mr-10000
+76260.0