You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ro...@apache.org on 2013/09/18 17:53:01 UTC

svn commit: r1524466 - in /pig/trunk: CHANGES.txt src/org/apache/pig/impl/util/HashOutputStream.java src/org/apache/pig/newplan/logical/relational/LogicalPlan.java

Author: rohini
Date: Wed Sep 18 15:53:01 2013
New Revision: 1524466

URL: http://svn.apache.org/r1524466
Log:
PIG-3455: Pig 0.11.1 OutOfMemory error (rohini)

Added:
    pig/trunk/src/org/apache/pig/impl/util/HashOutputStream.java
Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1524466&r1=1524465&r2=1524466&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Wed Sep 18 15:53:01 2013
@@ -246,10 +246,6 @@ PIG-3374: CASE and IN fail when expressi
 
 PIG-2606: union/ join operations are not accepting same alias as multiple inputs (hsubramaniyan via daijy)
 
-PIG-3435: Custom Partitioner not working with MultiQueryOptimizer (knoguchi via daijy)
-
-PIG-3385: DISTINCT no longer uses custom partitioner (knoguchi via daijy)
-
 PIG-3379: Alias reuse in nested foreach causes PIG script to fail (xuefuz via daijy)
 
 PIG-3432: typo in log message in SchemaTupleFrontend (epishkin via cheolsoo)
@@ -382,8 +378,6 @@ PIG-3172: Partition filter push down doe
 
 PIG-3205: Passing arguments to python script does not work with -f option (rohini)
 
-PIG-2507: Semicolon in paramenters for UDF results in parsing error (tnachen via daijy)
-
 PIG-3239: Unable to return multiple values from a macro using SPLIT (dreambird via cheolsoo)
 
 PIG-3077: TestMultiQueryLocal should not write in /tmp (dreambird via cheolsoo)
@@ -483,6 +477,14 @@ PIG-2769: a simple logic causes very lon
 
 BUG FIXES
 
+PIG-3455: Pig 0.11.1 OutOfMemory error (rohini)
+
+PIG-3435: Custom Partitioner not working with MultiQueryOptimizer (knoguchi via daijy)
+
+PIG-3385: DISTINCT no longer uses custom partitioner (knoguchi via daijy)
+
+PIG-2507: Semicolon in paramenters for UDF results in parsing error (tnachen via daijy)
+
 PIG-3341: Strict datetime parsing and improve performance of loading datetime values (rohini)
 
 PIG-3329: RANK operator failed when working with SPLIT (xalan via cheolsoo)

Added: pig/trunk/src/org/apache/pig/impl/util/HashOutputStream.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/util/HashOutputStream.java?rev=1524466&view=auto
==============================================================================
--- pig/trunk/src/org/apache/pig/impl/util/HashOutputStream.java (added)
+++ pig/trunk/src/org/apache/pig/impl/util/HashOutputStream.java Wed Sep 18 15:53:01 2013
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.impl.util;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+import com.google.common.hash.HashCode;
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hasher;
+
+public class HashOutputStream extends OutputStream {
+
+    private Hasher hasher;
+
+    public HashOutputStream(HashFunction hf) {
+        hasher = hf.newHasher();
+    }
+
+    @Override
+    public void write(int b) throws IOException {
+        hasher.putInt(b);
+    }
+
+    public HashCode getHashCode() {
+        return hasher.hash();
+    }
+
+}

Modified: pig/trunk/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java?rev=1524466&r1=1524465&r2=1524466&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java (original)
+++ pig/trunk/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java Wed Sep 18 15:53:01 2013
@@ -18,40 +18,43 @@
 
 package org.apache.pig.newplan.logical.relational;
 
-import java.io.ByteArrayOutputStream;
 import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 
 import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.impl.util.HashOutputStream;
 import org.apache.pig.newplan.BaseOperatorPlan;
 import org.apache.pig.newplan.Operator;
 import org.apache.pig.newplan.OperatorPlan;
 import org.apache.pig.newplan.logical.DotLOPrinter;
 import org.apache.pig.newplan.logical.optimizer.LogicalPlanPrinter;
 
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hashing;
+
 /**
- * LogicalPlan is the logical view of relational operations Pig will execute 
+ * LogicalPlan is the logical view of relational operations Pig will execute
  * for a given script.  Note that it contains only relational operations.
  * All expressions will be contained in LogicalExpressionPlans inside
  * each relational operator.
  */
 public class LogicalPlan extends BaseOperatorPlan {
-  
+
     public LogicalPlan(LogicalPlan other) {
         // shallow copy constructor
         super(other);
     }
-    
+
     public LogicalPlan() {
         super();
     }
-    
+
     /**
      * Equality is checked by calling equals on every leaf in the plan.  This
-     * assumes that plans are always connected graphs.  It is somewhat 
-     * inefficient since every leaf will test equality all the way to 
+     * assumes that plans are always connected graphs.  It is somewhat
+     * inefficient since every leaf will test equality all the way to
      * every root.  But it is only intended for use in testing, so that
      * should be ok.  Checking predecessors (as opposed to successors) was
      * chosen because splits (which have multiple successors) do not depend
@@ -60,19 +63,19 @@ public class LogicalPlan extends BaseOpe
      * graph has no correctness implications, whereas reversing the inputs
      * of join can.  This method of doing equals will detect predecessors
      * in different orders but not successors in different orders.
-     * It will return false if either plan has non deterministic EvalFunc. 
+     * It will return false if either plan has non deterministic EvalFunc.
      */
     @Override
     public boolean isEqual(OperatorPlan other) throws FrontendException {
         if (other == null || !(other instanceof LogicalPlan)) {
             return false;
         }
-        
-        return super.isEqual(other);   
+
+        return super.isEqual(other);
     }
-    
+
     @Override
-    public void explain(PrintStream ps, String format, boolean verbose) 
+    public void explain(PrintStream ps, String format, boolean verbose)
     throws FrontendException {
         if (format.equals("xml")) {
             ps.println("<logicalPlan>XML Not Supported</logicalPlan>");
@@ -105,7 +108,7 @@ public class LogicalPlan extends BaseOpe
     	        ops.add( op );
     	    }
     	}
-    	
+
     	if( ops.isEmpty() ) {
             return null;
     	} else {
@@ -116,18 +119,21 @@ public class LogicalPlan extends BaseOpe
     /**
      * Returns the signature of the LogicalPlan. The signature is a unique identifier for a given
      * plan generated by a Pig script. The same script run multiple times with the same version of
-     * Pig is guarenteed to produce the same signature, even if the input or output locations differ.
+     * Pig is guaranteed to produce the same signature, even if the input or output locations differ.
      *
      * @return a unique identifier for the logical plan
      * @throws FrontendException if signature can't be computed
      */
     public String getSignature() throws FrontendException {
-        ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        PrintStream ps = new PrintStream(baos);
+
+        // Use a streaming hash function. goodFastHash(32) is murmur3 32 bits
+        HashFunction hf = Hashing.goodFastHash(32);
+        HashOutputStream hos = new HashOutputStream(hf);
+        PrintStream ps = new PrintStream(hos);
 
         LogicalPlanPrinter printer = new LogicalPlanPrinter(this, ps);
         printer.visit();
 
-        return Integer.toString(baos.toString().hashCode());
+        return Integer.toString(hos.getHashCode().asInt());
     }
 }