You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2010/09/30 15:16:44 UTC
svn commit: r1003056 - in /hadoop/hive/trunk: CHANGES.txt
ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java
ql/src/test/queries/clientpositive/mapjoin1.q
ql/src/test/results/clientpositive/mapjoin1.q.out
Author: namit
Date: Thu Sep 30 13:16:44 2010
New Revision: 1003056
URL: http://svn.apache.org/viewvc?rev=1003056&view=rev
Log:
HIVE-1670 MapJoin throws an error if no column from the mapjoined table is selected
(Ning Zhang via namit)
Added:
hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin1.q
hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin1.q.out
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=1003056&r1=1003055&r2=1003056&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Thu Sep 30 13:16:44 2010
@@ -309,6 +309,9 @@ Trunk - Unreleased
HIVE-1671 multithreading on Context.pathToCS
(Bennie Schut via namit)
+ HIVE-1670 MapJoin throws an error if no column from the mapjoined table is selected
+ (Ning Zhang via namit)
+
TESTS
HIVE-1464. improve test query performance
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java?rev=1003056&r1=1003055&r2=1003056&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java Thu Sep 30 13:16:44 2010
@@ -24,6 +24,8 @@ import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.util.ArrayList;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
@@ -33,7 +35,6 @@ import org.apache.hadoop.hive.serde2.Ser
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.io.Writable;
-
/**
* Map Join Object used for both key and value.
*/
@@ -43,6 +44,7 @@ public class MapJoinObjectValue implemen
protected transient RowContainer obj;
protected transient Configuration conf;
protected int bucketSize; // bucket size for RowContainer
+ protected Log LOG = LogFactory.getLog(this.getClass().getName());
public MapJoinObjectValue() {
bucketSize = 100; // default bucket size
@@ -71,7 +73,6 @@ public class MapJoinObjectValue implemen
}
}
}
-
return false;
}
@@ -90,19 +91,25 @@ public class MapJoinObjectValue implemen
MapJoinObjectCtx ctx = MapJoinOperator.getMapMetadata().get(
Integer.valueOf(metadataTag));
int sz = in.readInt();
+
RowContainer res = new RowContainer(bucketSize, ctx.getConf());
res.setSerDe(ctx.getSerDe(), ctx.getStandardOI());
res.setTableDesc(ctx.getTblDesc());
- for (int pos = 0; pos < sz; pos++) {
- Writable val = ctx.getSerDe().getSerializedClass().newInstance();
- val.readFields(in);
-
- ArrayList<Object> memObj = (ArrayList<Object>) ObjectInspectorUtils
- .copyToStandardObject(ctx.getSerDe().deserialize(val), ctx
- .getSerDe().getObjectInspector(),
- ObjectInspectorCopyOption.WRITABLE);
+ if (sz > 0) {
+ int numCols = in.readInt();
+ if (numCols > 0) {
+ for (int pos = 0; pos < sz; pos++) {
+ Writable val = ctx.getSerDe().getSerializedClass().newInstance();
+ val.readFields(in);
+
+ ArrayList<Object> memObj = (ArrayList<Object>) ObjectInspectorUtils
+ .copyToStandardObject(ctx.getSerDe().deserialize(val), ctx
+ .getSerDe().getObjectInspector(),
+ ObjectInspectorCopyOption.WRITABLE);
- res.add(memObj);
+ res.add(memObj);
+ }
+ }
}
obj = res;
} catch (Exception e) {
@@ -123,10 +130,16 @@ public class MapJoinObjectValue implemen
// Different processing for key and value
RowContainer<ArrayList<Object>> v = obj;
out.writeInt(v.size());
-
- for (ArrayList<Object> row = v.first(); row != null; row = v.next()) {
- Writable outVal = ctx.getSerDe().serialize(row, ctx.getStandardOI());
- outVal.write(out);
+ if (v.size() > 0) {
+ ArrayList<Object> row = v.first();
+ out.writeInt(row.size());
+
+ if (row.size() > 0) {
+ for (; row != null; row = v.next()) {
+ Writable outVal = ctx.getSerDe().serialize(row, ctx.getStandardOI());
+ outVal.write(out);
+ }
+ }
}
} catch (SerDeException e) {
throw new IOException(e);
Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin1.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin1.q?rev=1003056&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin1.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin1.q Thu Sep 30 13:16:44 2010
@@ -0,0 +1,5 @@
+set hive.mapjoin.cache.numrows=100;
+
+SELECT /*+ MAPJOIN(b) */ sum(a.key) as sum_a
+ FROM srcpart a
+ JOIN src b ON a.key = b.key where a.ds is not null;
Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin1.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin1.q.out?rev=1003056&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin1.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin1.q.out Thu Sep 30 13:16:44 2010
@@ -0,0 +1,21 @@
+PREHOOK: query: SELECT /*+ MAPJOIN(b) */ sum(a.key) as sum_a
+ FROM srcpart a
+ JOIN src b ON a.key = b.key where a.ds is not null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-24_21-14-16_226_3903968223903966652/-mr-10000
+POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ sum(a.key) as sum_a
+ FROM srcpart a
+ JOIN src b ON a.key = b.key where a.ds is not null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-24_21-14-16_226_3903968223903966652/-mr-10000
+76260.0