You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kh...@apache.org on 2015/01/16 20:51:28 UTC

svn commit: r1652509 - in /hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog: common/HCatConstants.java mapreduce/FileOutputCommitterContainer.java

Author: khorgath
Date: Fri Jan 16 19:51:28 2015
New Revision: 1652509

URL: http://svn.apache.org/r1652509
Log:
HIVE-9381 : HCatalog hardcodes maximum append limit to 1000 (Sushanth Sowmyan, reviewed by Daniel Dai)

Modified:
    hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java
    hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java

Modified: hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java?rev=1652509&r1=1652508&r2=1652509&view=diff
==============================================================================
--- hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java (original)
+++ hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java Fri Jan 16 19:51:28 2015
@@ -96,6 +96,19 @@ public final class HCatConstants {
   public static final String HCAT_DESIRED_PARTITION_NUM_SPLITS =
     "hcat.desired.partition.num.splits";
 
+  /**
+   * hcat.append.limit allows a hcat user to specify a custom append limit.
+   * By default, while appending to an existing directory, hcat will attempt
+   * to avoid naming clashes and try to append _a_NNN where NNN is a number to
+   * the desired filename to avoid clashes. However, by default, it only tries
+   * for NNN from 0 to 999 before giving up. This can cause an issue for some
+   * tables with an extraordinarily large number of files. Ideally, this should
+   * be fixed by the user changing their usage pattern and doing some manner of
+   * compaction, but in the meanwhile, until they can, setting this parameter
+   * can be used to bump that limit.
+   */
+  public static final String HCAT_APPEND_LIMIT = "hcat.append.limit";
+
   // IMPORTANT IMPORTANT IMPORTANT!!!!!
   //The keys used to store info into the job Configuration.
   //If any new keys are added, the HCatStorer needs to be updated. The HCatStorer

Modified: hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java?rev=1652509&r1=1652508&r2=1652509&view=diff
==============================================================================
--- hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java (original)
+++ hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java Fri Jan 16 19:51:28 2015
@@ -75,6 +75,8 @@ class FileOutputCommitterContainer exten
   static final String DYNTEMP_DIR_NAME = "_DYN";
   static final String SCRATCH_DIR_NAME = "_SCRATCH";
   private static final String APPEND_SUFFIX = "_a_";
+  private static final int APPEND_COUNTER_WARN_THRESHOLD = 1000;
+  private final int maxAppendAttempts;
 
   private static final Logger LOG = LoggerFactory.getLogger(FileOutputCommitterContainer.class);
   private final boolean dynamicPartitioningUsed;
@@ -112,6 +114,8 @@ class FileOutputCommitterContainer exten
     } else {
       customDynamicLocationUsed = false;
     }
+
+    this.maxAppendAttempts = context.getConfiguration().getInt(HCatConstants.HCAT_APPEND_LIMIT, APPEND_COUNTER_WARN_THRESHOLD);
   }
 
   @Override
@@ -646,19 +650,23 @@ class FileOutputCommitterContainer exten
           filetype = "";
         }
 
-        // Attempt to find COUNTER_MAX possible alternatives to a filename by
+        // Attempt to find maxAppendAttempts possible alternatives to a filename by
         // appending _a_N and seeing if that destination also clashes. If we're
         // still clashing after that, give up.
-        final int COUNTER_MAX = 1000;
         int counter = 1;
-        for (; fs.exists(itemDest) && counter < COUNTER_MAX ; counter++) {
+        for (; fs.exists(itemDest) && counter < maxAppendAttempts; counter++) {
           itemDest = new Path(dest, name + (APPEND_SUFFIX + counter) + filetype);
         }
 
-        if (counter == COUNTER_MAX){
+        if (counter == maxAppendAttempts){
           throw new HCatException(ErrorType.ERROR_MOVE_FAILED,
               "Could not find a unique destination path for move: file = "
                   + file + " , src = " + src + ", dest = " + dest);
+        } else if (counter > APPEND_COUNTER_WARN_THRESHOLD) {
+          LOG.warn("Append job used filename clash counter [" + counter
+              +"] which is greater than warning limit [" + APPEND_COUNTER_WARN_THRESHOLD
+              +"]. Please compact this table so that performance is not impacted."
+              + " Please see HIVE-9381 for details.");
         }
 
       }