You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2009/06/08 08:34:16 UTC
svn commit: r782541 - in /hadoop/hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/exec/MapJoinObjectKey.java
java/org/apache/hadoop/hive/ql/exec/MapJoinObjectValue.java
test/queries/clientpositive/join36.q test/results/clientpositive/join36.q.out
Author: zshao
Date: Mon Jun 8 06:34:15 2009
New Revision: 782541
URL: http://svn.apache.org/viewvc?rev=782541&view=rev
Log:
HIVE-528. Map Join followup: split MapJoinObject into MapJoinObjectKey and MapJoinObjectValue. (Namit Jain via zshao)
Added:
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinObjectKey.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinObjectValue.java
hadoop/hive/trunk/ql/src/test/queries/clientpositive/join36.q
hadoop/hive/trunk/ql/src/test/results/clientpositive/join36.q.out
Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinObjectKey.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinObjectKey.java?rev=782541&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinObjectKey.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinObjectKey.java Mon Jun 8 06:34:15 2009
@@ -0,0 +1,133 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec;
+
+import java.io.Externalizable;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.util.ArrayList;
+
+import org.apache.hadoop.hive.ql.exec.MapJoinOperator.MapJoinObjectCtx;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * Map Join Object used for both key
+ */
+public class MapJoinObjectKey implements Externalizable {
+
+ transient protected int metadataTag;
+ transient protected ArrayList<Object> obj;
+ transient Writable val;
+
+ public MapJoinObjectKey() {
+ val = new BytesWritable();
+ }
+
+ /**
+ * @param metadataTag
+ * @param objectTypeTag
+ * @param obj
+ */
+ public MapJoinObjectKey(int metadataTag, ArrayList<Object> obj) {
+ this.metadataTag = metadataTag;
+ this.obj = obj;
+ val = new BytesWritable();
+ }
+
+ public boolean equals(Object o) {
+ if (o instanceof MapJoinObjectKey) {
+ MapJoinObjectKey mObj = (MapJoinObjectKey)o;
+ if (mObj.getMetadataTag() == metadataTag) {
+ if ((obj == null) && (mObj.getObj() == null))
+ return true;
+ if ((obj != null) && (mObj.getObj() != null) && (mObj.getObj().equals(obj)))
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ public int hashCode() {
+ return (obj == null) ? 0 : obj.hashCode();
+ }
+
+ @Override
+ public void readExternal(ObjectInput in) throws IOException,
+ ClassNotFoundException {
+ try {
+ metadataTag = in.readInt();
+
+ // get the tableDesc from the map stored in the mapjoin operator
+ MapJoinObjectCtx ctx = MapJoinOperator.getMapMetadata().get(Integer.valueOf(metadataTag));
+ val.readFields(in);
+ obj = (ArrayList<Object>)ctx.getDeserializer().deserialize(val);
+ } catch (Exception e) {
+ throw new IOException(e.getMessage());
+ }
+ }
+
+ @Override
+ public void writeExternal(ObjectOutput out) throws IOException {
+ try {
+ out.writeInt(metadataTag);
+
+ // get the tableDesc from the map stored in the mapjoin operator
+ MapJoinObjectCtx ctx = MapJoinOperator.getMapMetadata().get(Integer.valueOf(metadataTag));
+
+ // Different processing for key and value
+ Writable outVal = ctx.getSerializer().serialize(obj, ctx.getSerObjInspector());
+ outVal.write(out);
+ }
+ catch (Exception e) {
+ throw new IOException(e.getMessage());
+ }
+ }
+
+ /**
+ * @return the metadataTag
+ */
+ public int getMetadataTag() {
+ return metadataTag;
+ }
+
+ /**
+ * @param metadataTag the metadataTag to set
+ */
+ public void setMetadataTag(int metadataTag) {
+ this.metadataTag = metadataTag;
+ }
+
+ /**
+ * @return the obj
+ */
+ public ArrayList<Object> getObj() {
+ return obj;
+ }
+
+ /**
+ * @param obj the obj to set
+ */
+ public void setObj(ArrayList<Object> obj) {
+ this.obj = obj;
+ }
+
+}
Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinObjectValue.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinObjectValue.java?rev=782541&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinObjectValue.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinObjectValue.java Mon Jun 8 06:34:15 2009
@@ -0,0 +1,162 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec;
+
+import java.io.Externalizable;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Vector;
+
+import org.apache.hadoop.hive.ql.exec.MapJoinOperator.MapJoinObjectCtx;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.hive.serde2.lazy.LazyObject;
+import org.apache.hadoop.hive.serde2.lazy.LazyStruct;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+
+/**
+ * Map Join Object used for both key and value
+ */
+public class MapJoinObjectValue implements Externalizable {
+
+ transient protected int metadataTag;
+ transient protected Vector<ArrayList<Object>> obj;
+ transient Writable val;
+
+
+ public MapJoinObjectValue() {
+ val = new Text();
+ }
+
+ /**
+ * @param metadataTag
+ * @param objectTypeTag
+ * @param obj
+ */
+ public MapJoinObjectValue(int metadataTag, Vector<ArrayList<Object>> obj) {
+ val = new Text();
+ this.metadataTag = metadataTag;
+ this.obj = obj;
+ }
+
+ public boolean equals(Object o) {
+ if (o instanceof MapJoinObjectValue) {
+ MapJoinObjectValue mObj = (MapJoinObjectValue)o;
+ if (mObj.getMetadataTag() == metadataTag) {
+ if ((obj == null) && (mObj.getObj() == null))
+ return true;
+ if ((obj != null) && (mObj.getObj() != null) && (mObj.getObj().equals(obj)))
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ public int hashCode() {
+ return (obj == null) ? 0 : obj.hashCode();
+ }
+
+ @Override
+ public void readExternal(ObjectInput in) throws IOException,
+ ClassNotFoundException {
+ try {
+ metadataTag = in.readInt();
+
+ // get the tableDesc from the map stored in the mapjoin operator
+ MapJoinObjectCtx ctx = MapJoinOperator.getMapMetadata().get(Integer.valueOf(metadataTag));
+ int sz = in.readInt();
+
+ Vector<ArrayList<Object>> res = new Vector<ArrayList<Object>>();
+ for (int pos = 0; pos < sz; pos++) {
+ ArrayList<Object> memObj = new ArrayList<Object>();
+ val.readFields(in);
+ StructObjectInspector objIns = (StructObjectInspector) ctx
+ .getDeserObjInspector();
+ LazyStruct lazyObj = (LazyStruct) (((LazyObject) ctx.getDeserializer()
+ .deserialize(val)).getObject());
+ List<? extends StructField> listFields = objIns.getAllStructFieldRefs();
+ for (StructField fld : listFields) {
+ memObj.add(objIns.getStructFieldData(lazyObj, fld));
+ }
+
+ res.add(memObj);
+ }
+ obj = res;
+ } catch (Exception e) {
+ throw new IOException(e.getMessage());
+ }
+ }
+
+ @Override
+ public void writeExternal(ObjectOutput out) throws IOException {
+ try {
+
+ out.writeInt(metadataTag);
+
+ // get the tableDesc from the map stored in the mapjoin operator
+ MapJoinObjectCtx ctx = MapJoinOperator.getMapMetadata().get(Integer.valueOf(metadataTag));
+
+ // Different processing for key and value
+ Vector<ArrayList<Object>> v = (Vector<ArrayList<Object>>) obj;
+ out.writeInt(v.size());
+
+ for (int pos = 0; pos < v.size(); pos++) {
+ Writable outVal = ctx.getSerializer().serialize(v.get(pos), ctx.getSerObjInspector());
+ outVal.write(out);
+ }
+ }
+ catch (Exception e) {
+ throw new IOException(e.getMessage());
+ }
+ }
+
+ /**
+ * @return the metadataTag
+ */
+ public int getMetadataTag() {
+ return metadataTag;
+ }
+
+ /**
+ * @param metadataTag the metadataTag to set
+ */
+ public void setMetadataTag(int metadataTag) {
+ this.metadataTag = metadataTag;
+ }
+
+ /**
+ * @return the obj
+ */
+ public Vector<ArrayList<Object>> getObj() {
+ return obj;
+ }
+
+ /**
+ * @param obj the obj to set
+ */
+ public void setObj(Vector<ArrayList<Object>> obj) {
+ this.obj = obj;
+ }
+
+}
Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/join36.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/join36.q?rev=782541&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/join36.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/join36.q Mon Jun 8 06:34:15 2009
@@ -0,0 +1,29 @@
+set hive.mapjoin.numrows = 2;
+
+drop table dest_j1;
+drop table tmp1;
+drop table tmp2;
+
+CREATE TABLE tmp1(key INT, cnt INT);
+CREATE TABLE tmp2(key INT, cnt INT);
+CREATE TABLE dest_j1(key INT, value INT, val2 INT);
+
+INSERT OVERWRITE TABLE tmp1
+SELECT key, count(1) from src group by key;
+
+INSERT OVERWRITE TABLE tmp2
+SELECT key, count(1) from src group by key;
+
+EXPLAIN
+INSERT OVERWRITE TABLE dest_j1
+SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt
+FROM tmp1 x JOIN tmp2 y ON (x.key = y.key);
+
+INSERT OVERWRITE TABLE dest_j1
+SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt
+FROM tmp1 x JOIN tmp2 y ON (x.key = y.key);
+
+select * from dest_j1 x order by x.key;
+
+drop table dest_j1;
+
Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/join36.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/join36.q.out?rev=782541&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/join36.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/join36.q.out Mon Jun 8 06:34:15 2009
@@ -0,0 +1,424 @@
+query: drop table dest_j1
+query: drop table tmp1
+query: drop table tmp2
+query: CREATE TABLE tmp1(key INT, cnt INT)
+query: CREATE TABLE tmp2(key INT, cnt INT)
+query: CREATE TABLE dest_j1(key INT, value INT, val2 INT)
+query: INSERT OVERWRITE TABLE tmp1
+SELECT key, count(1) from src group by key
+Input: default/src
+Output: default/tmp1
+query: INSERT OVERWRITE TABLE tmp2
+SELECT key, count(1) from src group by key
+Input: default/src
+Output: default/tmp2
+query: EXPLAIN
+INSERT OVERWRITE TABLE dest_j1
+SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt
+FROM tmp1 x JOIN tmp2 y ON (x.key = y.key)
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF tmp1 x) (TOK_TABREF tmp2 y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest_j1)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) cnt)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ y
+ Common Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {cnt}
+ 1 {key} {cnt}
+ keys:
+ 0
+ 1
+ Position of Big Table: 1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ x
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ x
+ Common Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {cnt}
+ 1 {key} {cnt}
+ keys:
+ 0
+ 1
+ Position of Big Table: 1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ file:/data/users/njain/hive1/hive1/build/ql/tmp/1050118775/10002
+ Select Operator
+ Select Operator
+ expressions:
+ expr: 0
+ type: int
+ expr: 1
+ type: int
+ expr: 3
+ type: int
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: dest_j1
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: dest_j1
+
+
+query: INSERT OVERWRITE TABLE dest_j1
+SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt
+FROM tmp1 x JOIN tmp2 y ON (x.key = y.key)
+Input: default/tmp2
+Input: default/tmp1
+Output: default/dest_j1
+query: select * from dest_j1 x order by x.key
+Input: default/dest_j1
+Output: file:/data/users/njain/hive1/hive1/build/ql/tmp/912828284/10000
+0 3 3
+2 1 1
+4 1 1
+5 3 3
+8 1 1
+9 1 1
+10 1 1
+11 1 1
+12 2 2
+15 2 2
+17 1 1
+18 2 2
+19 1 1
+20 1 1
+24 2 2
+26 2 2
+27 1 1
+28 1 1
+30 1 1
+33 1 1
+34 1 1
+35 3 3
+37 2 2
+41 1 1
+42 2 2
+43 1 1
+44 1 1
+47 1 1
+51 2 2
+53 1 1
+54 1 1
+57 1 1
+58 2 2
+64 1 1
+65 1 1
+66 1 1
+67 2 2
+69 1 1
+70 3 3
+72 2 2
+74 1 1
+76 2 2
+77 1 1
+78 1 1
+80 1 1
+82 1 1
+83 2 2
+84 2 2
+85 1 1
+86 1 1
+87 1 1
+90 3 3
+92 1 1
+95 2 2
+96 1 1
+97 2 2
+98 2 2
+100 2 2
+103 2 2
+104 2 2
+105 1 1
+111 1 1
+113 2 2
+114 1 1
+116 1 1
+118 2 2
+119 3 3
+120 2 2
+125 2 2
+126 1 1
+128 3 3
+129 2 2
+131 1 1
+133 1 1
+134 2 2
+136 1 1
+137 2 2
+138 4 4
+143 1 1
+145 1 1
+146 2 2
+149 2 2
+150 1 1
+152 2 2
+153 1 1
+155 1 1
+156 1 1
+157 1 1
+158 1 1
+160 1 1
+162 1 1
+163 1 1
+164 2 2
+165 2 2
+166 1 1
+167 3 3
+168 1 1
+169 4 4
+170 1 1
+172 2 2
+174 2 2
+175 2 2
+176 2 2
+177 1 1
+178 1 1
+179 2 2
+180 1 1
+181 1 1
+183 1 1
+186 1 1
+187 3 3
+189 1 1
+190 1 1
+191 2 2
+192 1 1
+193 3 3
+194 1 1
+195 2 2
+196 1 1
+197 2 2
+199 3 3
+200 2 2
+201 1 1
+202 1 1
+203 2 2
+205 2 2
+207 2 2
+208 3 3
+209 2 2
+213 2 2
+214 1 1
+216 2 2
+217 2 2
+218 1 1
+219 2 2
+221 2 2
+222 1 1
+223 2 2
+224 2 2
+226 1 1
+228 1 1
+229 2 2
+230 5 5
+233 2 2
+235 1 1
+237 2 2
+238 2 2
+239 2 2
+241 1 1
+242 2 2
+244 1 1
+247 1 1
+248 1 1
+249 1 1
+252 1 1
+255 2 2
+256 2 2
+257 1 1
+258 1 1
+260 1 1
+262 1 1
+263 1 1
+265 2 2
+266 1 1
+272 2 2
+273 3 3
+274 1 1
+275 1 1
+277 4 4
+278 2 2
+280 2 2
+281 2 2
+282 2 2
+283 1 1
+284 1 1
+285 1 1
+286 1 1
+287 1 1
+288 2 2
+289 1 1
+291 1 1
+292 1 1
+296 1 1
+298 3 3
+302 1 1
+305 1 1
+306 1 1
+307 2 2
+308 1 1
+309 2 2
+310 1 1
+311 3 3
+315 1 1
+316 3 3
+317 2 2
+318 3 3
+321 2 2
+322 2 2
+323 1 1
+325 2 2
+327 3 3
+331 2 2
+332 1 1
+333 2 2
+335 1 1
+336 1 1
+338 1 1
+339 1 1
+341 1 1
+342 2 2
+344 2 2
+345 1 1
+348 5 5
+351 1 1
+353 2 2
+356 1 1
+360 1 1
+362 1 1
+364 1 1
+365 1 1
+366 1 1
+367 2 2
+368 1 1
+369 3 3
+373 1 1
+374 1 1
+375 1 1
+377 1 1
+378 1 1
+379 1 1
+382 2 2
+384 3 3
+386 1 1
+389 1 1
+392 1 1
+393 1 1
+394 1 1
+395 2 2
+396 3 3
+397 2 2
+399 2 2
+400 1 1
+401 5 5
+402 1 1
+403 3 3
+404 2 2
+406 4 4
+407 1 1
+409 3 3
+411 1 1
+413 2 2
+414 2 2
+417 3 3
+418 1 1
+419 1 1
+421 1 1
+424 2 2
+427 1 1
+429 2 2
+430 3 3
+431 3 3
+432 1 1
+435 1 1
+436 1 1
+437 1 1
+438 3 3
+439 2 2
+443 1 1
+444 1 1
+446 1 1
+448 1 1
+449 1 1
+452 1 1
+453 1 1
+454 3 3
+455 1 1
+457 1 1
+458 2 2
+459 2 2
+460 1 1
+462 2 2
+463 2 2
+466 3 3
+467 1 1
+468 4 4
+469 5 5
+470 1 1
+472 1 1
+475 1 1
+477 1 1
+478 2 2
+479 1 1
+480 3 3
+481 1 1
+482 1 1
+483 1 1
+484 1 1
+485 1 1
+487 1 1
+489 4 4
+490 1 1
+491 1 1
+492 2 2
+493 1 1
+494 1 1
+495 1 1
+496 1 1
+497 1 1
+498 3 3
+query: drop table dest_j1