You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kh...@apache.org on 2015/01/16 20:51:28 UTC
svn commit: r1652509 - in
/hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog:
common/HCatConstants.java mapreduce/FileOutputCommitterContainer.java
Author: khorgath
Date: Fri Jan 16 19:51:28 2015
New Revision: 1652509
URL: http://svn.apache.org/r1652509
Log:
HIVE-9381 : HCatalog hardcodes maximum append limit to 1000 (Sushanth Sowmyan, reviewed by Daniel Dai)
Modified:
hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java
hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java
Modified: hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java?rev=1652509&r1=1652508&r2=1652509&view=diff
==============================================================================
--- hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java (original)
+++ hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java Fri Jan 16 19:51:28 2015
@@ -96,6 +96,19 @@ public final class HCatConstants {
public static final String HCAT_DESIRED_PARTITION_NUM_SPLITS =
"hcat.desired.partition.num.splits";
+ /**
+ * hcat.append.limit allows a hcat user to specify a custom append limit.
+ * By default, while appending to an existing directory, hcat will attempt
+ * to avoid naming clashes and try to append _a_NNN where NNN is a number to
+ * the desired filename to avoid clashes. However, by default, it only tries
+ * for NNN from 0 to 999 before giving up. This can cause an issue for some
+ * tables with an extraordinarily large number of files. Ideally, this should
+ * be fixed by the user changing their usage pattern and doing some manner of
+ * compaction, but in the meanwhile, until they can, setting this parameter
+ * can be used to bump that limit.
+ */
+ public static final String HCAT_APPEND_LIMIT = "hcat.append.limit";
+
// IMPORTANT IMPORTANT IMPORTANT!!!!!
//The keys used to store info into the job Configuration.
//If any new keys are added, the HCatStorer needs to be updated. The HCatStorer
Modified: hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java?rev=1652509&r1=1652508&r2=1652509&view=diff
==============================================================================
--- hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java (original)
+++ hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java Fri Jan 16 19:51:28 2015
@@ -75,6 +75,8 @@ class FileOutputCommitterContainer exten
static final String DYNTEMP_DIR_NAME = "_DYN";
static final String SCRATCH_DIR_NAME = "_SCRATCH";
private static final String APPEND_SUFFIX = "_a_";
+ private static final int APPEND_COUNTER_WARN_THRESHOLD = 1000;
+ private final int maxAppendAttempts;
private static final Logger LOG = LoggerFactory.getLogger(FileOutputCommitterContainer.class);
private final boolean dynamicPartitioningUsed;
@@ -112,6 +114,8 @@ class FileOutputCommitterContainer exten
} else {
customDynamicLocationUsed = false;
}
+
+ this.maxAppendAttempts = context.getConfiguration().getInt(HCatConstants.HCAT_APPEND_LIMIT, APPEND_COUNTER_WARN_THRESHOLD);
}
@Override
@@ -646,19 +650,23 @@ class FileOutputCommitterContainer exten
filetype = "";
}
- // Attempt to find COUNTER_MAX possible alternatives to a filename by
+ // Attempt to find maxAppendAttempts possible alternatives to a filename by
// appending _a_N and seeing if that destination also clashes. If we're
// still clashing after that, give up.
- final int COUNTER_MAX = 1000;
int counter = 1;
- for (; fs.exists(itemDest) && counter < COUNTER_MAX ; counter++) {
+ for (; fs.exists(itemDest) && counter < maxAppendAttempts; counter++) {
itemDest = new Path(dest, name + (APPEND_SUFFIX + counter) + filetype);
}
- if (counter == COUNTER_MAX){
+ if (counter == maxAppendAttempts){
throw new HCatException(ErrorType.ERROR_MOVE_FAILED,
"Could not find a unique destination path for move: file = "
+ file + " , src = " + src + ", dest = " + dest);
+ } else if (counter > APPEND_COUNTER_WARN_THRESHOLD) {
+ LOG.warn("Append job used filename clash counter [" + counter
+ +"] which is greater than warning limit [" + APPEND_COUNTER_WARN_THRESHOLD
+ +"]. Please compact this table so that performance is not impacted."
+ + " Please see HIVE-9381 for details.");
}
}