You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2013/10/21 21:34:54 UTC

svn commit: r1534337 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java test/queries/clientnegative/compute_stats_long.q test/results/clientnegative/compute_stats_long.q.out

Author: brock
Date: Mon Oct 21 19:34:53 2013
New Revision: 1534337

URL: http://svn.apache.org/r1534337
Log:
HIVE-4957 - Restrict number of bit vectors, to prevent out of Java heap memory (Shreepadma Venugopalan via Brock Noland)

Added:
    hive/trunk/ql/src/test/queries/clientnegative/compute_stats_long.q
    hive/trunk/ql/src/test/results/clientnegative/compute_stats_long.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java?rev=1534337&r1=1534336&r2=1534337&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java Mon Oct 21 19:34:53 2013
@@ -308,6 +308,7 @@ public class GenericUDAFComputeStats ext
      */
     private transient PrimitiveObjectInspector inputOI;
     private transient PrimitiveObjectInspector numVectorsOI;
+    private final static int MAX_BIT_VECTORS = 1024;
 
     /* Partial aggregation result returned by TerminatePartial. Partial result is a struct
      * containing a long field named "count".
@@ -477,6 +478,10 @@ public class GenericUDAFComputeStats ext
         if (!emptyTable) {
           numVectors = PrimitiveObjectInspectorUtils.getInt(parameters[1], numVectorsOI);
         }
+        if (numVectors > MAX_BIT_VECTORS) {
+          throw new HiveException("The maximum allowed value for number of bit vectors " +
+            " is " + MAX_BIT_VECTORS + ", but was passed " + numVectors + " bit vectors");
+        }
         initNDVEstimator(myagg, numVectors);
         myagg.firstItem = false;
         myagg.numBitVectors = numVectors;
@@ -604,6 +609,7 @@ public class GenericUDAFComputeStats ext
      */
     private transient PrimitiveObjectInspector inputOI;
     private transient PrimitiveObjectInspector numVectorsOI;
+    private final static int MAX_BIT_VECTORS = 1024;
 
     /* Partial aggregation result returned by TerminatePartial. Partial result is a struct
      * containing a long field named "count".
@@ -773,6 +779,12 @@ public class GenericUDAFComputeStats ext
         if (!emptyTable) {
           numVectors = PrimitiveObjectInspectorUtils.getInt(parameters[1], numVectorsOI);
         }
+
+        if (numVectors > MAX_BIT_VECTORS) {
+          throw new HiveException("The maximum allowed value for number of bit vectors " +
+            " is " + MAX_BIT_VECTORS + ", but was passed " + numVectors + " bit vectors");
+        }
+
         initNDVEstimator(myagg, numVectors);
         myagg.firstItem = false;
         myagg.numBitVectors = numVectors;
@@ -901,6 +913,7 @@ public class GenericUDAFComputeStats ext
      */
     private transient PrimitiveObjectInspector inputOI;
     private transient PrimitiveObjectInspector numVectorsOI;
+    private final static int MAX_BIT_VECTORS = 1024;
 
     /* Partial aggregation result returned by TerminatePartial. Partial result is a struct
      * containing a long field named "count".
@@ -1081,6 +1094,12 @@ public class GenericUDAFComputeStats ext
         if (!emptyTable) {
           numVectors = PrimitiveObjectInspectorUtils.getInt(parameters[1], numVectorsOI);
         }
+
+        if (numVectors > MAX_BIT_VECTORS) {
+          throw new HiveException("The maximum allowed value for number of bit vectors " +
+            " is " + MAX_BIT_VECTORS + " , but was passed " + numVectors + " bit vectors");
+        }
+
         initNDVEstimator(myagg, numVectors);
         myagg.firstItem = false;
         myagg.numBitVectors = numVectors;

Added: hive/trunk/ql/src/test/queries/clientnegative/compute_stats_long.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/compute_stats_long.q?rev=1534337&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/compute_stats_long.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/compute_stats_long.q Mon Oct 21 19:34:53 2013
@@ -0,0 +1,7 @@
+create table tab_int(a int);
+
+-- insert some data
+LOAD DATA LOCAL INPATH "../data/files/int.txt" INTO TABLE tab_int;
+
+-- compute stats should raise an error since the number of bit vectors > 1024
+select compute_stats(a, 10000) from tab_int;

Added: hive/trunk/ql/src/test/results/clientnegative/compute_stats_long.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/compute_stats_long.q.out?rev=1534337&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/compute_stats_long.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/compute_stats_long.q.out Mon Oct 21 19:34:53 2013
@@ -0,0 +1,29 @@
+PREHOOK: query: create table tab_int(a int)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table tab_int(a int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@tab_int
+PREHOOK: query: -- insert some data
+LOAD DATA LOCAL INPATH "../data/files/int.txt" INTO TABLE tab_int
+PREHOOK: type: LOAD
+PREHOOK: Output: default@tab_int
+POSTHOOK: query: -- insert some data
+LOAD DATA LOCAL INPATH "../data/files/int.txt" INTO TABLE tab_int
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@tab_int
+PREHOOK: query: -- compute stats should raise an error since the number of bit vectors > 1024
+select compute_stats(a, 10000) from tab_int
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab_int
+#### A masked pattern was here ####
+Execution failed with exit status: 2
+Obtaining error information
+
+Task failed!
+Task ID:
+  Stage-1
+
+Logs:
+
+#### A masked pattern was here ####
+FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask