You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ro...@apache.org on 2013/09/18 17:53:01 UTC
svn commit: r1524466 - in /pig/trunk: CHANGES.txt
src/org/apache/pig/impl/util/HashOutputStream.java
src/org/apache/pig/newplan/logical/relational/LogicalPlan.java
Author: rohini
Date: Wed Sep 18 15:53:01 2013
New Revision: 1524466
URL: http://svn.apache.org/r1524466
Log:
PIG-3455: Pig 0.11.1 OutOfMemory error (rohini)
Added:
pig/trunk/src/org/apache/pig/impl/util/HashOutputStream.java
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1524466&r1=1524465&r2=1524466&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Wed Sep 18 15:53:01 2013
@@ -246,10 +246,6 @@ PIG-3374: CASE and IN fail when expressi
PIG-2606: union/ join operations are not accepting same alias as multiple inputs (hsubramaniyan via daijy)
-PIG-3435: Custom Partitioner not working with MultiQueryOptimizer (knoguchi via daijy)
-
-PIG-3385: DISTINCT no longer uses custom partitioner (knoguchi via daijy)
-
PIG-3379: Alias reuse in nested foreach causes PIG script to fail (xuefuz via daijy)
PIG-3432: typo in log message in SchemaTupleFrontend (epishkin via cheolsoo)
@@ -382,8 +378,6 @@ PIG-3172: Partition filter push down doe
PIG-3205: Passing arguments to python script does not work with -f option (rohini)
-PIG-2507: Semicolon in paramenters for UDF results in parsing error (tnachen via daijy)
-
PIG-3239: Unable to return multiple values from a macro using SPLIT (dreambird via cheolsoo)
PIG-3077: TestMultiQueryLocal should not write in /tmp (dreambird via cheolsoo)
@@ -483,6 +477,14 @@ PIG-2769: a simple logic causes very lon
BUG FIXES
+PIG-3455: Pig 0.11.1 OutOfMemory error (rohini)
+
+PIG-3435: Custom Partitioner not working with MultiQueryOptimizer (knoguchi via daijy)
+
+PIG-3385: DISTINCT no longer uses custom partitioner (knoguchi via daijy)
+
+PIG-2507: Semicolon in paramenters for UDF results in parsing error (tnachen via daijy)
+
PIG-3341: Strict datetime parsing and improve performance of loading datetime values (rohini)
PIG-3329: RANK operator failed when working with SPLIT (xalan via cheolsoo)
Added: pig/trunk/src/org/apache/pig/impl/util/HashOutputStream.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/util/HashOutputStream.java?rev=1524466&view=auto
==============================================================================
--- pig/trunk/src/org/apache/pig/impl/util/HashOutputStream.java (added)
+++ pig/trunk/src/org/apache/pig/impl/util/HashOutputStream.java Wed Sep 18 15:53:01 2013
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.impl.util;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+import com.google.common.hash.HashCode;
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hasher;
+
+public class HashOutputStream extends OutputStream {
+
+ private Hasher hasher;
+
+ public HashOutputStream(HashFunction hf) {
+ hasher = hf.newHasher();
+ }
+
+ @Override
+ public void write(int b) throws IOException {
+ hasher.putInt(b);
+ }
+
+ public HashCode getHashCode() {
+ return hasher.hash();
+ }
+
+}
Modified: pig/trunk/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java?rev=1524466&r1=1524465&r2=1524466&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java (original)
+++ pig/trunk/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java Wed Sep 18 15:53:01 2013
@@ -18,40 +18,43 @@
package org.apache.pig.newplan.logical.relational;
-import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.impl.util.HashOutputStream;
import org.apache.pig.newplan.BaseOperatorPlan;
import org.apache.pig.newplan.Operator;
import org.apache.pig.newplan.OperatorPlan;
import org.apache.pig.newplan.logical.DotLOPrinter;
import org.apache.pig.newplan.logical.optimizer.LogicalPlanPrinter;
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hashing;
+
/**
- * LogicalPlan is the logical view of relational operations Pig will execute
+ * LogicalPlan is the logical view of relational operations Pig will execute
* for a given script. Note that it contains only relational operations.
* All expressions will be contained in LogicalExpressionPlans inside
* each relational operator.
*/
public class LogicalPlan extends BaseOperatorPlan {
-
+
public LogicalPlan(LogicalPlan other) {
// shallow copy constructor
super(other);
}
-
+
public LogicalPlan() {
super();
}
-
+
/**
* Equality is checked by calling equals on every leaf in the plan. This
- * assumes that plans are always connected graphs. It is somewhat
- * inefficient since every leaf will test equality all the way to
+ * assumes that plans are always connected graphs. It is somewhat
+ * inefficient since every leaf will test equality all the way to
* every root. But it is only intended for use in testing, so that
* should be ok. Checking predecessors (as opposed to successors) was
* chosen because splits (which have multiple successors) do not depend
@@ -60,19 +63,19 @@ public class LogicalPlan extends BaseOpe
* graph has no correctness implications, whereas reversing the inputs
* of join can. This method of doing equals will detect predecessors
* in different orders but not successors in different orders.
- * It will return false if either plan has non deterministic EvalFunc.
+ * It will return false if either plan has non deterministic EvalFunc.
*/
@Override
public boolean isEqual(OperatorPlan other) throws FrontendException {
if (other == null || !(other instanceof LogicalPlan)) {
return false;
}
-
- return super.isEqual(other);
+
+ return super.isEqual(other);
}
-
+
@Override
- public void explain(PrintStream ps, String format, boolean verbose)
+ public void explain(PrintStream ps, String format, boolean verbose)
throws FrontendException {
if (format.equals("xml")) {
ps.println("<logicalPlan>XML Not Supported</logicalPlan>");
@@ -105,7 +108,7 @@ public class LogicalPlan extends BaseOpe
ops.add( op );
}
}
-
+
if( ops.isEmpty() ) {
return null;
} else {
@@ -116,18 +119,21 @@ public class LogicalPlan extends BaseOpe
/**
* Returns the signature of the LogicalPlan. The signature is a unique identifier for a given
* plan generated by a Pig script. The same script run multiple times with the same version of
- * Pig is guarenteed to produce the same signature, even if the input or output locations differ.
+ * Pig is guaranteed to produce the same signature, even if the input or output locations differ.
*
* @return a unique identifier for the logical plan
* @throws FrontendException if signature can't be computed
*/
public String getSignature() throws FrontendException {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- PrintStream ps = new PrintStream(baos);
+
+ // Use a streaming hash function. goodFastHash(32) is murmur3 32 bits
+ HashFunction hf = Hashing.goodFastHash(32);
+ HashOutputStream hos = new HashOutputStream(hf);
+ PrintStream ps = new PrintStream(hos);
LogicalPlanPrinter printer = new LogicalPlanPrinter(this, ps);
printer.visit();
- return Integer.toString(baos.toString().hashCode());
+ return Integer.toString(hos.getHashCode().asInt());
}
}