You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2009/06/08 08:34:16 UTC

svn commit: r782541 - in /hadoop/hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/MapJoinObjectKey.java java/org/apache/hadoop/hive/ql/exec/MapJoinObjectValue.java test/queries/clientpositive/join36.q test/results/clientpositive/join36.q.out

Author: zshao
Date: Mon Jun  8 06:34:15 2009
New Revision: 782541

URL: http://svn.apache.org/viewvc?rev=782541&view=rev
Log:
HIVE-528. Map Join followup: split MapJoinObject into MapJoinObjectKey and MapJoinObjectValue. (Namit Jain via zshao)

Added:
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinObjectKey.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinObjectValue.java
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/join36.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join36.q.out

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinObjectKey.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinObjectKey.java?rev=782541&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinObjectKey.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinObjectKey.java Mon Jun  8 06:34:15 2009
@@ -0,0 +1,133 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec;
+
+import java.io.Externalizable;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.util.ArrayList;
+
+import org.apache.hadoop.hive.ql.exec.MapJoinOperator.MapJoinObjectCtx;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * Map Join Object used for both key
+ */
+public class MapJoinObjectKey implements Externalizable {
+
+  transient protected int     metadataTag;
+  transient protected ArrayList<Object>  obj;
+  transient Writable val;
+    
+  public MapJoinObjectKey() {
+    val = new BytesWritable();
+  }
+
+  /**
+   * @param metadataTag
+   * @param objectTypeTag
+   * @param obj
+   */
+  public MapJoinObjectKey(int metadataTag, ArrayList<Object> obj) {
+    this.metadataTag = metadataTag;
+    this.obj = obj;
+    val = new BytesWritable();
+  }
+  
+  public boolean equals(Object o) {
+    if (o instanceof MapJoinObjectKey) {
+      MapJoinObjectKey mObj = (MapJoinObjectKey)o;
+      if (mObj.getMetadataTag() == metadataTag) {
+        if ((obj == null) && (mObj.getObj() == null))
+          return true;
+        if ((obj != null) && (mObj.getObj() != null) && (mObj.getObj().equals(obj)))
+          return true;
+      }
+    }
+
+    return false;
+  }
+  
+  public int hashCode() {
+    return (obj == null) ? 0 : obj.hashCode();
+  }
+  
+  @Override
+  public void readExternal(ObjectInput in) throws IOException,
+      ClassNotFoundException {
+    try {
+      metadataTag = in.readInt();
+
+      // get the tableDesc from the map stored in the mapjoin operator
+      MapJoinObjectCtx ctx = MapJoinOperator.getMapMetadata().get(Integer.valueOf(metadataTag));
+      val.readFields(in);      
+      obj = (ArrayList<Object>)ctx.getDeserializer().deserialize(val);
+    } catch (Exception e) {
+      throw new IOException(e.getMessage());
+    }
+  }
+  
+  @Override
+  public void writeExternal(ObjectOutput out) throws IOException {
+    try {
+      out.writeInt(metadataTag);
+
+      // get the tableDesc from the map stored in the mapjoin operator
+      MapJoinObjectCtx ctx = MapJoinOperator.getMapMetadata().get(Integer.valueOf(metadataTag));
+
+      // Different processing for key and value
+      Writable outVal = ctx.getSerializer().serialize(obj, ctx.getSerObjInspector());
+      outVal.write(out);
+    }
+    catch (Exception e) {
+      throw new IOException(e.getMessage());
+    }
+  }
+
+  /**
+   * @return the metadataTag
+   */
+  public int getMetadataTag() {
+    return metadataTag;
+  }
+
+  /**
+   * @param metadataTag the metadataTag to set
+   */
+  public void setMetadataTag(int metadataTag) {
+    this.metadataTag = metadataTag;
+  }
+
+  /**
+   * @return the obj
+   */
+  public ArrayList<Object> getObj() {
+    return obj;
+  }
+
+  /**
+   * @param obj the obj to set
+   */
+  public void setObj(ArrayList<Object> obj) {
+    this.obj = obj;
+  }
+
+}

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinObjectValue.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinObjectValue.java?rev=782541&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinObjectValue.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinObjectValue.java Mon Jun  8 06:34:15 2009
@@ -0,0 +1,162 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec;
+
+import java.io.Externalizable;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Vector;
+
+import org.apache.hadoop.hive.ql.exec.MapJoinOperator.MapJoinObjectCtx;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.hive.serde2.lazy.LazyObject;
+import org.apache.hadoop.hive.serde2.lazy.LazyStruct;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+
+/**
+ * Map Join Object used for both key and value
+ */
+public class MapJoinObjectValue implements Externalizable {
+
+  transient protected int     metadataTag;
+  transient protected Vector<ArrayList<Object>>  obj;
+  transient Writable val;
+
+
+  public MapJoinObjectValue() {
+    val = new Text();
+  }
+
+  /**
+   * @param metadataTag
+   * @param objectTypeTag
+   * @param obj
+   */
+  public MapJoinObjectValue(int metadataTag, Vector<ArrayList<Object>> obj) {
+    val = new Text();
+    this.metadataTag = metadataTag;
+    this.obj = obj;
+  }
+  
+  public boolean equals(Object o) {
+    if (o instanceof MapJoinObjectValue) {
+      MapJoinObjectValue mObj = (MapJoinObjectValue)o;
+      if (mObj.getMetadataTag() == metadataTag) {
+        if ((obj == null) && (mObj.getObj() == null))
+          return true;
+        if ((obj != null) && (mObj.getObj() != null) && (mObj.getObj().equals(obj)))
+          return true;
+      }
+    }
+
+    return false;
+  }
+  
+  public int hashCode() {
+    return (obj == null) ? 0 : obj.hashCode();
+  }
+  
+  @Override
+  public void readExternal(ObjectInput in) throws IOException,
+      ClassNotFoundException {
+    try {
+      metadataTag   = in.readInt();
+
+      // get the tableDesc from the map stored in the mapjoin operator
+      MapJoinObjectCtx ctx = MapJoinOperator.getMapMetadata().get(Integer.valueOf(metadataTag));
+      int sz = in.readInt();
+
+      Vector<ArrayList<Object>> res = new Vector<ArrayList<Object>>();
+      for (int pos = 0; pos < sz; pos++) {
+        ArrayList<Object> memObj = new ArrayList<Object>();
+        val.readFields(in);
+        StructObjectInspector objIns = (StructObjectInspector) ctx
+            .getDeserObjInspector();
+        LazyStruct lazyObj = (LazyStruct) (((LazyObject) ctx.getDeserializer()
+            .deserialize(val)).getObject());
+        List<? extends StructField> listFields = objIns.getAllStructFieldRefs();
+        for (StructField fld : listFields) {
+          memObj.add(objIns.getStructFieldData(lazyObj, fld));
+        }
+
+        res.add(memObj);
+      }
+      obj = res;
+    } catch (Exception e) {
+      throw new IOException(e.getMessage());
+    }
+  }
+  
+  @Override
+  public void writeExternal(ObjectOutput out) throws IOException {
+    try {
+      
+      out.writeInt(metadataTag);
+
+      // get the tableDesc from the map stored in the mapjoin operator
+      MapJoinObjectCtx ctx = MapJoinOperator.getMapMetadata().get(Integer.valueOf(metadataTag));
+
+      // Different processing for key and value
+      Vector<ArrayList<Object>> v = (Vector<ArrayList<Object>>) obj;
+      out.writeInt(v.size());
+
+      for (int pos = 0; pos < v.size(); pos++) {
+        Writable outVal = ctx.getSerializer().serialize(v.get(pos), ctx.getSerObjInspector());
+        outVal.write(out);
+      }
+    }
+    catch (Exception e) {
+      throw new IOException(e.getMessage());
+    }
+  }
+
+  /**
+   * @return the metadataTag
+   */
+  public int getMetadataTag() {
+    return metadataTag;
+  }
+
+  /**
+   * @param metadataTag the metadataTag to set
+   */
+  public void setMetadataTag(int metadataTag) {
+    this.metadataTag = metadataTag;
+  }
+
+  /**
+   * @return the obj
+   */
+  public Vector<ArrayList<Object>> getObj() {
+    return obj;
+  }
+
+  /**
+   * @param obj the obj to set
+   */
+  public void setObj(Vector<ArrayList<Object>> obj) {
+    this.obj = obj;
+  }
+
+}

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/join36.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/join36.q?rev=782541&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/join36.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/join36.q Mon Jun  8 06:34:15 2009
@@ -0,0 +1,29 @@
+set hive.mapjoin.numrows = 2;
+
+drop table dest_j1;
+drop table tmp1;
+drop table tmp2;
+
+CREATE TABLE tmp1(key INT, cnt INT);
+CREATE TABLE tmp2(key INT, cnt INT);
+CREATE TABLE dest_j1(key INT, value INT, val2 INT);
+
+INSERT OVERWRITE TABLE tmp1
+SELECT key, count(1) from src group by key;
+
+INSERT OVERWRITE TABLE tmp2
+SELECT key, count(1) from src group by key;
+
+EXPLAIN
+INSERT OVERWRITE TABLE dest_j1 
+SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt
+FROM tmp1 x JOIN tmp2 y ON (x.key = y.key);
+
+INSERT OVERWRITE TABLE dest_j1 
+SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt
+FROM tmp1 x JOIN tmp2 y ON (x.key = y.key);
+
+select * from dest_j1 x order by x.key;
+
+drop table dest_j1;
+

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/join36.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/join36.q.out?rev=782541&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/join36.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/join36.q.out Mon Jun  8 06:34:15 2009
@@ -0,0 +1,424 @@
+query: drop table dest_j1
+query: drop table tmp1
+query: drop table tmp2
+query: CREATE TABLE tmp1(key INT, cnt INT)
+query: CREATE TABLE tmp2(key INT, cnt INT)
+query: CREATE TABLE dest_j1(key INT, value INT, val2 INT)
+query: INSERT OVERWRITE TABLE tmp1
+SELECT key, count(1) from src group by key
+Input: default/src
+Output: default/tmp1
+query: INSERT OVERWRITE TABLE tmp2
+SELECT key, count(1) from src group by key
+Input: default/src
+Output: default/tmp2
+query: EXPLAIN
+INSERT OVERWRITE TABLE dest_j1 
+SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt
+FROM tmp1 x JOIN tmp2 y ON (x.key = y.key)
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF tmp1 x) (TOK_TABREF tmp2 y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest_j1)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) cnt)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        y 
+            Common Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {key} {cnt}
+                1 {key} {cnt}
+              keys:
+                0 
+                1 
+              Position of Big Table: 1
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+      Local Work:
+        Map Reduce Local Work
+          Alias -> Map Local Tables:
+            x 
+              Fetch Operator
+                limit: -1
+          Alias -> Map Local Operator Tree:
+            x 
+                Common Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  condition expressions:
+                    0 {key} {cnt}
+                    1 {key} {cnt}
+                  keys:
+                    0 
+                    1 
+                  Position of Big Table: 1
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 0
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+        file:/data/users/njain/hive1/hive1/build/ql/tmp/1050118775/10002 
+          Select Operator
+            Select Operator
+              expressions:
+                    expr: 0
+                    type: int
+                    expr: 1
+                    type: int
+                    expr: 3
+                    type: int
+              File Output Operator
+                compressed: false
+                GlobalTableId: 1
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: dest_j1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+            replace: true
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                name: dest_j1
+
+
+query: INSERT OVERWRITE TABLE dest_j1 
+SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt
+FROM tmp1 x JOIN tmp2 y ON (x.key = y.key)
+Input: default/tmp2
+Input: default/tmp1
+Output: default/dest_j1
+query: select * from dest_j1 x order by x.key
+Input: default/dest_j1
+Output: file:/data/users/njain/hive1/hive1/build/ql/tmp/912828284/10000
+0	3	3
+2	1	1
+4	1	1
+5	3	3
+8	1	1
+9	1	1
+10	1	1
+11	1	1
+12	2	2
+15	2	2
+17	1	1
+18	2	2
+19	1	1
+20	1	1
+24	2	2
+26	2	2
+27	1	1
+28	1	1
+30	1	1
+33	1	1
+34	1	1
+35	3	3
+37	2	2
+41	1	1
+42	2	2
+43	1	1
+44	1	1
+47	1	1
+51	2	2
+53	1	1
+54	1	1
+57	1	1
+58	2	2
+64	1	1
+65	1	1
+66	1	1
+67	2	2
+69	1	1
+70	3	3
+72	2	2
+74	1	1
+76	2	2
+77	1	1
+78	1	1
+80	1	1
+82	1	1
+83	2	2
+84	2	2
+85	1	1
+86	1	1
+87	1	1
+90	3	3
+92	1	1
+95	2	2
+96	1	1
+97	2	2
+98	2	2
+100	2	2
+103	2	2
+104	2	2
+105	1	1
+111	1	1
+113	2	2
+114	1	1
+116	1	1
+118	2	2
+119	3	3
+120	2	2
+125	2	2
+126	1	1
+128	3	3
+129	2	2
+131	1	1
+133	1	1
+134	2	2
+136	1	1
+137	2	2
+138	4	4
+143	1	1
+145	1	1
+146	2	2
+149	2	2
+150	1	1
+152	2	2
+153	1	1
+155	1	1
+156	1	1
+157	1	1
+158	1	1
+160	1	1
+162	1	1
+163	1	1
+164	2	2
+165	2	2
+166	1	1
+167	3	3
+168	1	1
+169	4	4
+170	1	1
+172	2	2
+174	2	2
+175	2	2
+176	2	2
+177	1	1
+178	1	1
+179	2	2
+180	1	1
+181	1	1
+183	1	1
+186	1	1
+187	3	3
+189	1	1
+190	1	1
+191	2	2
+192	1	1
+193	3	3
+194	1	1
+195	2	2
+196	1	1
+197	2	2
+199	3	3
+200	2	2
+201	1	1
+202	1	1
+203	2	2
+205	2	2
+207	2	2
+208	3	3
+209	2	2
+213	2	2
+214	1	1
+216	2	2
+217	2	2
+218	1	1
+219	2	2
+221	2	2
+222	1	1
+223	2	2
+224	2	2
+226	1	1
+228	1	1
+229	2	2
+230	5	5
+233	2	2
+235	1	1
+237	2	2
+238	2	2
+239	2	2
+241	1	1
+242	2	2
+244	1	1
+247	1	1
+248	1	1
+249	1	1
+252	1	1
+255	2	2
+256	2	2
+257	1	1
+258	1	1
+260	1	1
+262	1	1
+263	1	1
+265	2	2
+266	1	1
+272	2	2
+273	3	3
+274	1	1
+275	1	1
+277	4	4
+278	2	2
+280	2	2
+281	2	2
+282	2	2
+283	1	1
+284	1	1
+285	1	1
+286	1	1
+287	1	1
+288	2	2
+289	1	1
+291	1	1
+292	1	1
+296	1	1
+298	3	3
+302	1	1
+305	1	1
+306	1	1
+307	2	2
+308	1	1
+309	2	2
+310	1	1
+311	3	3
+315	1	1
+316	3	3
+317	2	2
+318	3	3
+321	2	2
+322	2	2
+323	1	1
+325	2	2
+327	3	3
+331	2	2
+332	1	1
+333	2	2
+335	1	1
+336	1	1
+338	1	1
+339	1	1
+341	1	1
+342	2	2
+344	2	2
+345	1	1
+348	5	5
+351	1	1
+353	2	2
+356	1	1
+360	1	1
+362	1	1
+364	1	1
+365	1	1
+366	1	1
+367	2	2
+368	1	1
+369	3	3
+373	1	1
+374	1	1
+375	1	1
+377	1	1
+378	1	1
+379	1	1
+382	2	2
+384	3	3
+386	1	1
+389	1	1
+392	1	1
+393	1	1
+394	1	1
+395	2	2
+396	3	3
+397	2	2
+399	2	2
+400	1	1
+401	5	5
+402	1	1
+403	3	3
+404	2	2
+406	4	4
+407	1	1
+409	3	3
+411	1	1
+413	2	2
+414	2	2
+417	3	3
+418	1	1
+419	1	1
+421	1	1
+424	2	2
+427	1	1
+429	2	2
+430	3	3
+431	3	3
+432	1	1
+435	1	1
+436	1	1
+437	1	1
+438	3	3
+439	2	2
+443	1	1
+444	1	1
+446	1	1
+448	1	1
+449	1	1
+452	1	1
+453	1	1
+454	3	3
+455	1	1
+457	1	1
+458	2	2
+459	2	2
+460	1	1
+462	2	2
+463	2	2
+466	3	3
+467	1	1
+468	4	4
+469	5	5
+470	1	1
+472	1	1
+475	1	1
+477	1	1
+478	2	2
+479	1	1
+480	3	3
+481	1	1
+482	1	1
+483	1	1
+484	1	1
+485	1	1
+487	1	1
+489	4	4
+490	1	1
+491	1	1
+492	2	2
+493	1	1
+494	1	1
+495	1	1
+496	1	1
+497	1	1
+498	3	3
+query: drop table dest_j1