You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2014/12/17 01:49:04 UTC

svn commit: r1646120 - in /hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc: JDBCStatsAggregator.java JDBCStatsPublisher.java JDBCStatsSetupConstants.java JDBCStatsUtils.java

Author: sershe
Date: Wed Dec 17 00:49:03 2014
New Revision: 1646120

URL: http://svn.apache.org/r1646120
Log:
HIVE-9108 : Fix for HIVE-8735 is incorrect (stats with long paths) (Sergey Shelukhin, reviewed by Jason Dere)

Modified:
    hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsAggregator.java
    hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java
    hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsSetupConstants.java
    hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsUtils.java

Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsAggregator.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsAggregator.java?rev=1646120&r1=1646119&r2=1646120&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsAggregator.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsAggregator.java Wed Dec 17 00:49:03 2014
@@ -134,7 +134,7 @@ public class JDBCStatsAggregator impleme
       }
     };
 
-    fileID = JDBCStatsUtils.truncateRowId(fileID);
+    JDBCStatsUtils.validateRowId(fileID);
     String keyPrefix = Utilities.escapeSqlLike(fileID) + "%";
     for (int failures = 0;; failures++) {
       try {
@@ -218,7 +218,7 @@ public class JDBCStatsAggregator impleme
     };
     try {
 
-      rowID = JDBCStatsUtils.truncateRowId(rowID);
+      JDBCStatsUtils.validateRowId(rowID);
       String keyPrefix = Utilities.escapeSqlLike(rowID) + "%";
 
       PreparedStatement delStmt = Utilities.prepareWithRetry(conn,

Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java?rev=1646120&r1=1646119&r2=1646120&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java Wed Dec 17 00:49:03 2014
@@ -139,10 +139,9 @@ public class JDBCStatsPublisher implemen
           + " stats: " + JDBCStatsUtils.getSupportedStatistics());
       return false;
     }
-    String rowId = JDBCStatsUtils.truncateRowId(fileID);
+    JDBCStatsUtils.validateRowId(fileID);
     if (LOG.isInfoEnabled()) {
-      String truncateSuffix = (rowId != fileID) ? " (from " + fileID + ")" : ""; // object equality
-      LOG.info("Stats publishing for key " + rowId + truncateSuffix);
+      LOG.info("Stats publishing for key " + fileID);
     }
 
     Utilities.SQLCommand<Void> execUpdate = new Utilities.SQLCommand<Void>() {
@@ -157,7 +156,7 @@ public class JDBCStatsPublisher implemen
 
     for (int failures = 0;; failures++) {
       try {
-        insStmt.setString(1, rowId);
+        insStmt.setString(1, fileID);
         for (int i = 0; i < JDBCStatsUtils.getSupportedStatistics().size(); i++) {
           insStmt.setString(i + 2, stats.get(supportedStatistics.get(i)));
         }
@@ -176,10 +175,10 @@ public class JDBCStatsPublisher implemen
             for (i = 0; i < JDBCStatsUtils.getSupportedStatistics().size(); i++) {
               updStmt.setString(i + 1, stats.get(supportedStatistics.get(i)));
             }
-            updStmt.setString(supportedStatistics.size() + 1, rowId);
+            updStmt.setString(supportedStatistics.size() + 1, fileID);
             updStmt.setString(supportedStatistics.size() + 2,
                 stats.get(JDBCStatsUtils.getBasicStat()));
-            updStmt.setString(supportedStatistics.size() + 3, rowId);
+            updStmt.setString(supportedStatistics.size() + 3, fileID);
             Utilities.executeWithRetry(execUpdate, updStmt, waitWindow, maxRetries);
             return true;
           } catch (SQLRecoverableException ue) {
@@ -281,14 +280,36 @@ public class JDBCStatsPublisher implemen
         stmt = conn.createStatement();
         stmt.setQueryTimeout(timeout);
 
+        // TODO: why is this not done using Hive db scripts?
         // Check if the table exists
         DatabaseMetaData dbm = conn.getMetaData();
-        rs = dbm.getTables(null, null, JDBCStatsUtils.getStatTableName(), null);
+        String tableName = JDBCStatsUtils.getStatTableName();
+        rs = dbm.getTables(null, null, tableName, null);
         boolean tblExists = rs.next();
         if (!tblExists) { // Table does not exist, create it
           String createTable = JDBCStatsUtils.getCreate("");
-          stmt.executeUpdate(createTable);          
-        }      
+          stmt.executeUpdate(createTable);
+        } else {
+          // Upgrade column name to allow for longer paths.
+          String idColName = JDBCStatsUtils.getIdColumnName();
+          int colSize = -1;
+          try {
+            rs.close();
+            rs = dbm.getColumns(null, null, tableName, idColName);
+            if (rs.next()) {
+              colSize = rs.getInt("COLUMN_SIZE");
+              if (colSize < JDBCStatsSetupConstants.ID_COLUMN_VARCHAR_SIZE) {
+                String alterTable = JDBCStatsUtils.getAlterIdColumn();
+                  stmt.executeUpdate(alterTable);
+              }
+            } else {
+              LOG.warn("Failed to update " + idColName + " - column not found");
+            }
+          } catch (Throwable t) {
+            LOG.warn("Failed to update " + idColName + " (size "
+                + (colSize == -1 ? "unknown" : colSize) + ")", t);
+          }
+        }
       }
     } catch (Exception e) {
       LOG.error("Error during JDBC initialization. ", e);

Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsSetupConstants.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsSetupConstants.java?rev=1646120&r1=1646119&r2=1646120&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsSetupConstants.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsSetupConstants.java Wed Dec 17 00:49:03 2014
@@ -34,7 +34,6 @@ public final class JDBCStatsSetupConstan
 
   public static final String PART_STAT_RAW_DATA_SIZE_COLUMN_NAME = "RAW_DATA_SIZE";
 
-  // 255 is an old value that we will keep for now; it can be increased to 4000; limits are
   // MySQL - 65535, SQL Server - 8000, Oracle - 4000, Derby - 32762, Postgres - large.
-  public static final int ID_COLUMN_VARCHAR_SIZE = 255;
+  public static final int ID_COLUMN_VARCHAR_SIZE = 4000;
 }

Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsUtils.java?rev=1646120&r1=1646119&r2=1646120&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsUtils.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsUtils.java Wed Dec 17 00:49:03 2014
@@ -24,7 +24,6 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.hive.common.StatsSetupConst;
-import org.apache.hadoop.util.hash.MurmurHash;
 
 public class JDBCStatsUtils {
 
@@ -137,6 +136,15 @@ public class JDBCStatsUtils {
   }
 
   /**
+   * Prepares ALTER TABLE query
+   */
+  public static String getAlterIdColumn() {
+    return "ALTER TABLE " + JDBCStatsUtils.getStatTableName() + " ALTER COLUMN "
+        + JDBCStatsUtils.getIdColumnName() + " VARCHAR("
+        + JDBCStatsSetupConstants.ID_COLUMN_VARCHAR_SIZE + ")";
+  }
+
+  /**
    * Prepares UPDATE statement issued when updating existing statistics
    */
   public static String getUpdate(String comment) {
@@ -195,11 +203,10 @@ public class JDBCStatsUtils {
 
   /**
    * Make sure the row ID fits into the row ID column in the table.
-   * @param rowId Row ID.
-   * @return Resulting row ID truncated to correct length, if necessary.
    */
-  public static String truncateRowId(String rowId) {
-    return (rowId.length() <= JDBCStatsSetupConstants.ID_COLUMN_VARCHAR_SIZE)
-        ? rowId : Integer.toHexString(MurmurHash.getInstance().hash(rowId.getBytes()));
+  public static void validateRowId(String rowId) {
+    if (rowId.length() > JDBCStatsSetupConstants.ID_COLUMN_VARCHAR_SIZE) {
+      throw new RuntimeException("ID is too big, client should have truncated it: " + rowId);
+    }
   }
 }