You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/10/06 07:23:36 UTC

svn commit: r1394928 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/Utilities.java test/queries/clientpositive/smb_mapjoin_11.q test/results/clientpositive/smb_mapjoin_11.q.out

Author: namit
Date: Sat Oct  6 05:23:35 2012
New Revision: 1394928

URL: http://svn.apache.org/viewvc?rev=1394928&view=rev
Log:
HIVE-3536 Output of sort merge join is no longer bucketed
(Kevin Wilfong via namit)


Added:
    hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_11.q
    hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_11.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=1394928&r1=1394927&r2=1394928&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java Sat Oct  6 05:23:35 2012
@@ -1158,15 +1158,22 @@ public final class Utilities {
    * return an integer only - this should match a pure integer as well. {1,3} is used to limit
    * matching for attempts #'s 0-999.
    */
-  private static Pattern fileNameTaskIdRegex = Pattern.compile("^.*?([0-9]+)(_[0-9]{1,3})?(\\..*)?$");
+  private static final Pattern FILE_NAME_TO_TASK_ID_REGEX =
+      Pattern.compile("^.*?([0-9]+)(_[0-9]{1,3})?(\\..*)?$");
 
   /**
    * This retruns prefix part + taskID for bucket join for partitioned table
    */
-  private static Pattern fileNamePrefixedTaskIdRegex =
+  private static final Pattern FILE_NAME_PREFIXED_TASK_ID_REGEX =
       Pattern.compile("^.*?((\\(.*\\))?[0-9]+)(_[0-9]{1,3})?(\\..*)?$");
 
   /**
+   * This breaks a prefixed bucket number into the prefix and the taskID
+   */
+  private static final Pattern PREFIXED_TASK_ID_REGEX =
+      Pattern.compile("^(.*?\\(.*\\))?([0-9]+)$");
+
+  /**
    * Get the task id from the filename. It is assumed that the filename is derived from the output
    * of getTaskId
    *
@@ -1174,7 +1181,7 @@ public final class Utilities {
    *          filename to extract taskid from
    */
   public static String getTaskIdFromFilename(String filename) {
-    return getIdFromFilename(filename, fileNameTaskIdRegex);
+    return getIdFromFilename(filename, FILE_NAME_TO_TASK_ID_REGEX);
   }
 
   /**
@@ -1185,7 +1192,7 @@ public final class Utilities {
    *          filename to extract taskid from
    */
   public static String getPrefixedTaskIdFromFilename(String filename) {
-    return getIdFromFilename(filename, fileNamePrefixedTaskIdRegex);
+    return getIdFromFilename(filename, FILE_NAME_PREFIXED_TASK_ID_REGEX);
   }
 
   private static String getIdFromFilename(String filename, Pattern pattern) {
@@ -1228,14 +1235,41 @@ public final class Utilities {
     return replaceTaskId(taskId, String.valueOf(bucketNum));
   }
 
+  /**
+   * Returns strBucketNum with enough 0's prefixing the task ID portion of the String to make it
+   * equal in length to taskId
+   *
+   * @param taskId - the taskId used as a template for length
+   * @param strBucketNum - the bucket number of the output, may or may not be prefixed
+   * @return
+   */
   private static String replaceTaskId(String taskId, String strBucketNum) {
-    int bucketNumLen = strBucketNum.length();
+    Matcher m = PREFIXED_TASK_ID_REGEX.matcher(strBucketNum);
+    if (!m.matches()) {
+      LOG.warn("Unable to determine bucket number from file ID: " + strBucketNum + ". Using " +
+          "file ID as bucket number.");
+      return adjustBucketNumLen(strBucketNum, taskId);
+    } else {
+      String adjustedBucketNum = adjustBucketNumLen(m.group(2), taskId);
+      return (m.group(1) == null ? "" : m.group(1)) + adjustedBucketNum;
+    }
+  }
+
+  /**
+   * Adds 0's to the beginning of bucketNum until bucketNum and taskId are the same length.
+   *
+   * @param bucketNum - the bucket number, should not be prefixed
+   * @param taskId - the taskId used as a template for length
+   * @return
+   */
+  private static String adjustBucketNumLen(String bucketNum, String taskId) {
+    int bucketNumLen = bucketNum.length();
     int taskIdLen = taskId.length();
     StringBuffer s = new StringBuffer();
     for (int i = 0; i < taskIdLen - bucketNumLen; i++) {
       s.append("0");
     }
-    return s.toString() + strBucketNum;
+    return s.toString() + bucketNum;
   }
 
   /**

Added: hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_11.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_11.q?rev=1394928&view=auto
==============================================================================
    (empty)

Added: hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_11.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_11.q.out?rev=1394928&view=auto
==============================================================================
    (empty)