You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hive.apache.org by kg...@apache.org on 2019/07/09 15:46:56 UTC

[hive] branch master updated (12712d5 -> 33d2995)

This is an automated email from the ASF dual-hosted git repository.

kgyrtkirk pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git.


    from 12712d5  HIVE-21959: Clean up Concatenate and Msck DDL commands (Miklos Gergely, reviewed by Zoltan Haindrich)
     new b4a688b  HIVE-21938: Add database and table filter options to PreUpgradeTool (Krisztian Kasa via Zoltan Haindrich)
     new fb322b1  HIVE-21967: Clean up CreateTableLikeOperation (Miklos Gergely via Zoltan Haindrich)
     new cd3d71b  HIVE-21547: Temp Tables: Use stORC format for temporary tables (Krisztian Kasa via Gopal V)
     new 33d2995  HIVE-21933: Remove unused methods from Utilities (Ivan Suller via Laszlo Bodor)

The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../table/creation/CreateTableLikeOperation.java   | 137 +++++++---------
 .../org/apache/hadoop/hive/ql/exec/Utilities.java  | 155 +-----------------
 .../apache/hadoop/hive/ql/io/AcidOutputFormat.java |  11 ++
 .../hadoop/hive/ql/io/HiveFileFormatUtils.java     |   3 +-
 .../hadoop/hive/ql/io/orc/OrcRecordUpdater.java    |   4 +-
 .../apache/hadoop/hive/ql/exec/TestUtilities.java  |   9 +-
 .../hadoop/hive/upgrade/acid/PreUpgradeTool.java   | 104 +++++++-----
 .../hadoop/hive/upgrade/acid/RunOptions.java       |  83 ++++++++++
 .../hive/upgrade/acid/TestPreUpgradeTool.java      | 181 +++++++++++++++++----
 9 files changed, 391 insertions(+), 296 deletions(-)
 create mode 100644 upgrade-acid/pre-upgrade/src/main/java/org/apache/hadoop/hive/upgrade/acid/RunOptions.java

[hive] 02/04: HIVE-21967: Clean up CreateTableLikeOperation (Miklos Gergely via Zoltan Haindrich)

Posted by kg...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git

commit fb322b1b42739b842fc8c87ff260015d3e69dd45
Author: Miklos Gergely <mg...@hortonworks.com>
AuthorDate: Tue Jul 9 17:32:47 2019 +0200

    HIVE-21967: Clean up CreateTableLikeOperation (Miklos Gergely via Zoltan Haindrich)
    
    Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>
---
 .../table/creation/CreateTableLikeOperation.java   | 137 ++++++++++-----------
 1 file changed, 62 insertions(+), 75 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/creation/CreateTableLikeOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/creation/CreateTableLikeOperation.java
index 57b756a..4837d44 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/creation/CreateTableLikeOperation.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/creation/CreateTableLikeOperation.java
@@ -56,12 +56,12 @@ public class CreateTableLikeOperation extends DDLOperation<CreateTableLikeDesc>
   @Override
   public int execute() throws HiveException {
     // Get the existing table
-    Table oldtbl = context.getDb().getTable(desc.getLikeTableName());
+    Table oldTable = context.getDb().getTable(desc.getLikeTableName());
     Table tbl;
-    if (oldtbl.getTableType() == TableType.VIRTUAL_VIEW || oldtbl.getTableType() == TableType.MATERIALIZED_VIEW) {
-      tbl = createViewLikeTable(oldtbl);
+    if (oldTable.getTableType() == TableType.VIRTUAL_VIEW || oldTable.getTableType() == TableType.MATERIALIZED_VIEW) {
+      tbl = createViewLikeTable(oldTable);
     } else {
-      tbl = createTableLikeTable(oldtbl);
+      tbl = createTableLikeTable(oldTable);
     }
 
     // If location is specified - ensure that it is a full qualified name
@@ -81,82 +81,75 @@ public class CreateTableLikeOperation extends DDLOperation<CreateTableLikeDesc>
     return 0;
   }
 
-  private Table createViewLikeTable(Table oldtbl) throws HiveException {
-    Table tbl;
-    String targetTableName = desc.getTableName();
-    tbl = context.getDb().newTable(targetTableName);
+  private Table createViewLikeTable(Table oldTable) throws HiveException {
+    Table table = context.getDb().newTable(desc.getTableName());
 
     if (desc.getTblProps() != null) {
-      tbl.getTTable().getParameters().putAll(desc.getTblProps());
+      table.getTTable().getParameters().putAll(desc.getTblProps());
     }
 
-    tbl.setTableType(TableType.MANAGED_TABLE);
+    table.setTableType(TableType.MANAGED_TABLE);
 
     if (desc.isExternal()) {
-      tbl.setProperty("EXTERNAL", "TRUE");
-      tbl.setTableType(TableType.EXTERNAL_TABLE);
-      // if the partition discovery tablproperty is already defined don't change it
-      if (tbl.isPartitioned() && tbl.getProperty(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY) == null) {
-        // partition discovery is on by default if it already doesn't exist
-        tbl.setProperty(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY, "true");
-      }
+      setExternalProperties(table);
     }
 
-    tbl.setFields(oldtbl.getCols());
-    tbl.setPartCols(oldtbl.getPartCols());
-
-    if (desc.getDefaultSerName() == null) {
-      LOG.info("Default to LazySimpleSerDe for table {}", targetTableName);
-      tbl.setSerializationLib(LazySimpleSerDe.class.getName());
-    } else {
-      // let's validate that the serde exists
-      DDLUtils.validateSerDe(desc.getDefaultSerName(), context);
-      tbl.setSerializationLib(desc.getDefaultSerName());
-    }
+    table.setFields(oldTable.getCols());
+    table.setPartCols(oldTable.getPartCols());
 
     if (desc.getDefaultSerdeProps() != null) {
       for (Map.Entry<String, String> e : desc.getDefaultSerdeProps().entrySet()) {
-        tbl.setSerdeParam(e.getKey(), e.getValue());
+        table.setSerdeParam(e.getKey(), e.getValue());
       }
     }
 
-    tbl.setInputFormatClass(desc.getDefaultInputFormat());
-    tbl.setOutputFormatClass(desc.getDefaultOutputFormat());
-    tbl.getTTable().getSd().setInputFormat(tbl.getInputFormatClass().getName());
-    tbl.getTTable().getSd().setOutputFormat(tbl.getOutputFormatClass().getName());
+    setStorage(table);
 
-    return tbl;
+    return table;
   }
 
-  private Table createTableLikeTable(Table oldtbl) throws SemanticException, HiveException {
-    Table tbl = oldtbl;
+  private Table createTableLikeTable(Table table) throws SemanticException, HiveException {
+    String[] names = Utilities.getDbTableName(desc.getTableName());
+    table.setDbName(names[0]);
+    table.setTableName(names[1]);
+    table.setOwner(SessionState.getUserFromAuthenticator());
 
-    // find out database name and table name of target table
-    String targetTableName = desc.getTableName();
-    String[] names = Utilities.getDbTableName(targetTableName);
+    if (desc.getLocation() != null) {
+      table.setDataLocation(new Path(desc.getLocation()));
+    } else {
+      table.unsetDataLocation();
+    }
 
-    tbl.setDbName(names[0]);
-    tbl.setTableName(names[1]);
+    setTableParameters(table);
+
+    if (desc.isUserStorageFormat()) {
+      setStorage(table);
+    }
 
-    // using old table object, hence reset the owner to current user for new table.
-    tbl.setOwner(SessionState.getUserFromAuthenticator());
+    table.getTTable().setTemporary(desc.isTemporary());
+    table.getTTable().unsetId();
 
-    if (desc.getLocation() != null) {
-      tbl.setDataLocation(new Path(desc.getLocation()));
+    if (desc.isExternal()) {
+      setExternalProperties(table);
     } else {
-      tbl.unsetDataLocation();
+      table.getParameters().remove("EXTERNAL");
     }
 
+    return table;
+  }
+
+  private void setTableParameters(Table tbl) throws HiveException {
+    Set<String> retainer = new HashSet<String>();
+
     Class<? extends Deserializer> serdeClass;
     try {
-      serdeClass = oldtbl.getDeserializerClass();
+      serdeClass = tbl.getDeserializerClass();
     } catch (Exception e) {
       throw new HiveException(e);
     }
     // We should copy only those table parameters that are specified in the config.
     SerDeSpec spec = AnnotationUtils.getAnnotation(serdeClass, SerDeSpec.class);
 
-    Set<String> retainer = new HashSet<String>();
     // for non-native table, property storage_handler should be retained
     retainer.add(META_TABLE_STORAGE);
     if (spec != null && spec.schemaProps() != null) {
@@ -178,37 +171,31 @@ public class CreateTableLikeOperation extends DDLOperation<CreateTableLikeDesc>
     if (desc.getTblProps() != null) {
       params.putAll(desc.getTblProps());
     }
+  }
 
-    if (desc.isUserStorageFormat()) {
-      tbl.setInputFormatClass(desc.getDefaultInputFormat());
-      tbl.setOutputFormatClass(desc.getDefaultOutputFormat());
-      tbl.getTTable().getSd().setInputFormat(tbl.getInputFormatClass().getName());
-      tbl.getTTable().getSd().setOutputFormat(tbl.getOutputFormatClass().getName());
-      if (desc.getDefaultSerName() == null) {
-        LOG.info("Default to LazySimpleSerDe for like table {}", targetTableName);
-        tbl.setSerializationLib(LazySimpleSerDe.class.getName());
-      } else {
-        // let's validate that the serde exists
-        DDLUtils.validateSerDe(desc.getDefaultSerName(), context);
-        tbl.setSerializationLib(desc.getDefaultSerName());
-      }
-    }
-
-    tbl.getTTable().setTemporary(desc.isTemporary());
-    tbl.getTTable().unsetId();
+  private void setStorage(Table table) throws HiveException {
+    table.setInputFormatClass(desc.getDefaultInputFormat());
+    table.setOutputFormatClass(desc.getDefaultOutputFormat());
+    table.getTTable().getSd().setInputFormat(table.getInputFormatClass().getName());
+    table.getTTable().getSd().setOutputFormat(table.getOutputFormatClass().getName());
 
-    if (desc.isExternal()) {
-      tbl.setProperty("EXTERNAL", "TRUE");
-      tbl.setTableType(TableType.EXTERNAL_TABLE);
-      // if the partition discovery tablproperty is already defined don't change it
-      if (tbl.isPartitioned() && tbl.getProperty(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY) == null) {
-        // partition discovery is on by default if it already doesn't exist
-        tbl.setProperty(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY, "true");
-      }
+    if (desc.getDefaultSerName() == null) {
+      LOG.info("Default to LazySimpleSerDe for table {}", desc.getTableName());
+      table.setSerializationLib(LazySimpleSerDe.class.getName());
     } else {
-      tbl.getParameters().remove("EXTERNAL");
+      // let's validate that the serde exists
+      DDLUtils.validateSerDe(desc.getDefaultSerName(), context);
+      table.setSerializationLib(desc.getDefaultSerName());
     }
+  }
 
-    return tbl;
+  private void setExternalProperties(Table tbl) {
+    tbl.setProperty("EXTERNAL", "TRUE");
+    tbl.setTableType(TableType.EXTERNAL_TABLE);
+    // if the partition discovery table property is already defined don't change it
+    if (tbl.isPartitioned() && tbl.getProperty(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY) == null) {
+      // partition discovery is on by default if it already doesn't exist
+      tbl.setProperty(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY, "true");
+    }
   }
 }

[hive] 04/04: HIVE-21933: Remove unused methods from Utilities (Ivan Suller via Laszlo Bodor)

Posted by kg...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git

commit 33d2995bc2612e0d3e9ad0cfc2a88a9a7f5141bc
Author: Ivan Suller <is...@cloudera.com>
AuthorDate: Tue Jul 9 17:43:09 2019 +0200

    HIVE-21933: Remove unused methods from Utilities (Ivan Suller via Laszlo Bodor)
    
    Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>
---
 .../org/apache/hadoop/hive/ql/exec/Utilities.java  | 155 ++-------------------
 .../apache/hadoop/hive/ql/exec/TestUtilities.java  |   9 +-
 2 files changed, 12 insertions(+), 152 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index 68986dd..1346bed 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -404,7 +404,7 @@ public final class Utilities {
   /**
    * Pushes work into the global work map
    */
-  public static void setBaseWork(Configuration conf, String name, BaseWork work) {
+  private static void setBaseWork(Configuration conf, String name, BaseWork work) {
     Path path = getPlanPath(conf, name);
     setHasWork(conf, name);
     gWorkMap.get(conf).put(path, work);
@@ -753,14 +753,6 @@ public final class Utilities {
     }
   }
 
-  public static HashMap makeMap(Object... olist) {
-    HashMap ret = new HashMap();
-    for (int i = 0; i < olist.length; i += 2) {
-      ret.put(olist[i], olist[i + 1]);
-    }
-    return (ret);
-  }
-
   public static Properties makeProperties(String... olist) {
     Properties ret = new Properties();
     for (int i = 0; i < olist.length; i += 2) {
@@ -799,37 +791,11 @@ public final class Utilities {
     return new PartitionDesc(part, tableDesc);
   }
 
-  public static PartitionDesc getPartitionDesc(Partition part) throws HiveException {
-    return new PartitionDesc(part);
-  }
-
   public static PartitionDesc getPartitionDescFromTableDesc(TableDesc tblDesc, Partition part,
     boolean usePartSchemaProperties) throws HiveException {
     return new PartitionDesc(part, tblDesc, usePartSchemaProperties);
   }
 
-  private static String getOpTreeSkel_helper(Operator<?> op, String indent) {
-    if (op == null) {
-      return StringUtils.EMPTY;
-    }
-
-    StringBuilder sb = new StringBuilder();
-    sb.append(indent);
-    sb.append(op.toString());
-    sb.append("\n");
-    if (op.getChildOperators() != null) {
-      for (Object child : op.getChildOperators()) {
-        sb.append(getOpTreeSkel_helper((Operator<?>) child, indent + "  "));
-      }
-    }
-
-    return sb.toString();
-  }
-
-  public static String getOpTreeSkel(Operator<?> op) {
-    return getOpTreeSkel_helper(op, StringUtils.EMPTY);
-  }
-
   private static boolean isWhitespace(int c) {
     if (c == -1) {
       return false;
@@ -926,22 +892,6 @@ public final class Utilities {
   }
 
   /**
-   * Convert an output stream to a compressed output stream based on codecs and compression options
-   * specified in the Job Configuration.
-   *
-   * @param jc
-   *          Job Configuration
-   * @param out
-   *          Output Stream to be converted into compressed output stream
-   * @return compressed output stream
-   */
-  public static OutputStream createCompressedStream(JobConf jc, OutputStream out)
-      throws IOException {
-    boolean isCompressed = FileOutputFormat.getCompressOutput(jc);
-    return createCompressedStream(jc, out, isCompressed);
-  }
-
-  /**
    * Convert an output stream to a compressed output stream based on codecs codecs in the Job
    * Configuration. Caller specifies directly whether file is compressed or not
    *
@@ -966,22 +916,6 @@ public final class Utilities {
   }
 
   /**
-   * Based on compression option and configured output codec - get extension for output file. This
-   * is only required for text files - not sequencefiles
-   *
-   * @param jc
-   *          Job Configuration
-   * @param isCompressed
-   *          Whether the output file is compressed or not
-   * @return the required file extension (example: .gz)
-   * @deprecated Use {@link #getFileExtension(JobConf, boolean, HiveOutputFormat)}
-   */
-  @Deprecated
-  public static String getFileExtension(JobConf jc, boolean isCompressed) {
-    return getFileExtension(jc, isCompressed, new HiveIgnoreKeyTextOutputFormat());
-  }
-
-  /**
    * Based on compression option, output format, and configured output codec -
    * get extension for output file. Text files require an extension, whereas
    * others, like sequence files, do not.
@@ -1014,27 +948,6 @@ public final class Utilities {
   }
 
   /**
-   * Create a sequencefile output stream based on job configuration.
-   *
-   * @param jc
-   *          Job configuration
-   * @param fs
-   *          File System to create file in
-   * @param file
-   *          Path to be created
-   * @param keyClass
-   *          Java Class for key
-   * @param valClass
-   *          Java Class for value
-   * @return output stream over the created sequencefile
-   */
-  public static SequenceFile.Writer createSequenceWriter(JobConf jc, FileSystem fs, Path file,
-      Class<?> keyClass, Class<?> valClass, Progressable progressable) throws IOException {
-    boolean isCompressed = FileOutputFormat.getCompressOutput(jc);
-    return createSequenceWriter(jc, fs, file, keyClass, valClass, isCompressed, progressable);
-  }
-
-  /**
    * Create a sequencefile output stream based on job configuration Uses user supplied compression
    * flag (rather than obtaining it from the Job Configuration).
    *
@@ -1055,11 +968,11 @@ public final class Utilities {
       throws IOException {
     CompressionCodec codec = null;
     CompressionType compressionType = CompressionType.NONE;
-    Class codecClass = null;
+    Class<? extends CompressionCodec> codecClass = null;
     if (isCompressed) {
       compressionType = SequenceFileOutputFormat.getOutputCompressionType(jc);
       codecClass = FileOutputFormat.getOutputCompressorClass(jc, DefaultCodec.class);
-      codec = (CompressionCodec) ReflectionUtil.newInstance(codecClass, jc);
+      codec = ReflectionUtil.newInstance(codecClass, jc);
     }
     return SequenceFile.createWriter(fs, jc, file, keyClass, valClass, compressionType, codec,
       progressable);
@@ -1156,7 +1069,7 @@ public final class Utilities {
   /**
    * Detect if the supplied file is a temporary path.
    */
-  public static boolean isTempPath(FileStatus file) {
+  private static boolean isTempPath(FileStatus file) {
     String name = file.getPath().getName();
     // in addition to detecting hive temporary files, we also check hadoop
     // temporary folders that used to show up in older releases
@@ -1181,46 +1094,6 @@ public final class Utilities {
     }
   }
 
-  /**
-   * Moves files from src to dst if it is within the specified set of paths
-   * @param fs
-   * @param src
-   * @param dst
-   * @param filesToMove
-   * @throws IOException
-   * @throws HiveException
-   */
-  private static void moveSpecifiedFiles(FileSystem fs, Path src, Path dst, Set<Path> filesToMove)
-      throws IOException, HiveException {
-    if (!fs.exists(dst)) {
-      fs.mkdirs(dst);
-    }
-
-    FileStatus[] files = fs.listStatus(src);
-    for (FileStatus file : files) {
-      if (filesToMove.contains(file.getPath())) {
-        Utilities.moveFile(fs, file, dst);
-      } else if (file.isDir()) {
-        // Traverse directory contents.
-        // Directory nesting for dst needs to match src.
-        Path nestedDstPath = new Path(dst, file.getPath().getName());
-        Utilities.moveSpecifiedFiles(fs, file.getPath(), nestedDstPath, filesToMove);
-      }
-    }
-  }
-
-  public static void moveSpecifiedFiles(FileSystem fs, Path dst, Set<Path> filesToMove)
-      throws IOException, HiveException {
-    if (!fs.exists(dst)) {
-      fs.mkdirs(dst);
-    }
-
-    for (Path path: filesToMove) {
-      FileStatus fsStatus = fs.getFileStatus(path);
-      Utilities.moveFile(fs, fsStatus, dst);
-    }
-  }
-
   private static void moveFile(FileSystem fs, FileStatus file, Path dst) throws IOException,
       HiveException {
     Path srcFilePath = file.getPath();
@@ -1333,7 +1206,7 @@ public final class Utilities {
    * @param filename
    *          filename to extract taskid from
    */
-  public static String getPrefixedTaskIdFromFilename(String filename) {
+  private static String getPrefixedTaskIdFromFilename(String filename) {
     return getTaskIdFromFilename(filename, FILE_NAME_PREFIXED_TASK_ID_REGEX);
   }
 
@@ -1341,7 +1214,7 @@ public final class Utilities {
     return getIdFromFilename(filename, pattern, 1);
   }
 
-  public static int getAttemptIdFromFilename(String filename) {
+  private static int getAttemptIdFromFilename(String filename) {
     String attemptStr = getIdFromFilename(filename, FILE_NAME_PREFIXED_TASK_ID_REGEX, 3);
     return Integer.parseInt(attemptStr.substring(1));
   }
@@ -1364,14 +1237,6 @@ public final class Utilities {
     return taskId;
   }
 
-  public static String getFileNameFromDirName(String dirName) {
-    int dirEnd = dirName.lastIndexOf(Path.SEPARATOR);
-    if (dirEnd != -1) {
-      return dirName.substring(dirEnd + 1);
-    }
-    return dirName;
-  }
-
   /**
    * Replace the task id from the filename. It is assumed that the filename is derived from the
    * output of getTaskId
@@ -1481,7 +1346,7 @@ public final class Utilities {
   }
 
 
-  public static boolean shouldAvoidRename(FileSinkDesc conf, Configuration hConf) {
+  private static boolean shouldAvoidRename(FileSinkDesc conf, Configuration hConf) {
     // we are avoiding rename/move only if following conditions are met
     //  * execution engine is tez
     //  * query cache is disabled
@@ -1647,7 +1512,7 @@ public final class Utilities {
     return removeTempOrDuplicateFiles(fs, stats, dpCtx, conf, hconf, isBaseDir);
   }
 
-  public static List<Path> removeTempOrDuplicateFiles(FileSystem fs, FileStatus[] fileStats,
+  private static List<Path> removeTempOrDuplicateFiles(FileSystem fs, FileStatus[] fileStats,
       DynamicPartitionCtx dpCtx, FileSinkDesc conf, Configuration hconf, boolean isBaseDir) throws IOException {
     return removeTempOrDuplicateFiles(fs, fileStats, dpCtx, conf, hconf, null, isBaseDir);
   }
@@ -1657,7 +1522,7 @@ public final class Utilities {
    *
    * @return a list of path names corresponding to should-be-created empty buckets.
    */
-  public static List<Path> removeTempOrDuplicateFiles(FileSystem fs, FileStatus[] fileStats,
+  private static List<Path> removeTempOrDuplicateFiles(FileSystem fs, FileStatus[] fileStats,
       DynamicPartitionCtx dpCtx, FileSinkDesc conf, Configuration hconf, Set<FileStatus> filesKept, boolean isBaseDir)
           throws IOException {
     int dpLevels = dpCtx == null ? 0 : dpCtx.getNumDPCols(),
@@ -2711,7 +2576,6 @@ public final class Utilities {
     }
   }
 
-  @SuppressWarnings("unchecked")
   private static <T> List<T> getTasks(List<Task<? extends Serializable>> tasks,
       TaskFilterFunction<T> function) {
     DAGTraversal.traverse(tasks, function);
@@ -3665,7 +3529,6 @@ public final class Utilities {
    * @param rWork Used to find FileSinkOperators
    * @throws IOException
    */
-  @SuppressWarnings("unchecked")
   public static void createTmpDirs(Configuration conf, ReduceWork rWork)
       throws IOException {
     if (rWork == null) {
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java
index dc851c0..fbf948c 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java
@@ -101,10 +101,6 @@ public class TestUtilities {
         getFileExtension(jc, false, new HiveIgnoreKeyTextOutputFormat()));
     assertEquals("Deflate for uncompressed text format", ".deflate",
         getFileExtension(jc, true, new HiveIgnoreKeyTextOutputFormat()));
-    assertEquals("No extension for uncompressed default format", "",
-        getFileExtension(jc, false));
-    assertEquals("Deflate for uncompressed default format", ".deflate",
-        getFileExtension(jc, true));
 
     String extension = ".myext";
     jc.set("hive.output.file.extension", extension);
@@ -349,7 +345,7 @@ public class TestUtilities {
       String testPartitionName = "p=" + i;
       testPartitionsPaths[i] = new Path(testTablePath, "p=" + i);
       mapWork.getPathToAliases().put(testPartitionsPaths[i], Lists.newArrayList(testPartitionName));
-      mapWork.getAliasToWork().put(testPartitionName, (Operator<?>) mock(Operator.class));
+      mapWork.getAliasToWork().put(testPartitionName, mock(Operator.class));
       mapWork.getPathToPartitionInfo().put(testPartitionsPaths[i], mockPartitionDesc);
 
     }
@@ -501,7 +497,7 @@ public class TestUtilities {
 
       pathToAliasTable.put(testPartitionsPaths[i], Lists.newArrayList(testPartitionName));
 
-      mapWork.getAliasToWork().put(testPartitionName, (Operator<?>) mock(Operator.class));
+      mapWork.getAliasToWork().put(testPartitionName, mock(Operator.class));
     }
 
     mapWork.setPathToAliases(pathToAliasTable);
@@ -638,6 +634,7 @@ public class TestUtilities {
       super.addDependentTask(wrappedDep);
     }
 
+    @Override
     public boolean addDependentTask(Task<? extends Serializable> dependent) {
       return wrappedDep.addDependentTask(dependent);
     }

[hive] 01/04: HIVE-21938: Add database and table filter options to PreUpgradeTool (Krisztian Kasa via Zoltan Haindrich)

Posted by kg...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git

commit b4a688b00738068bf27869791afbaaee4a05c396
Author: Krisztian Kasa <kk...@cloudera.com>
AuthorDate: Tue Jul 9 17:30:51 2019 +0200

    HIVE-21938: Add database and table filter options to PreUpgradeTool (Krisztian Kasa via Zoltan Haindrich)
    
    Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>
---
 .../hadoop/hive/upgrade/acid/PreUpgradeTool.java   | 104 +++++++-----
 .../hadoop/hive/upgrade/acid/RunOptions.java       |  83 ++++++++++
 .../hive/upgrade/acid/TestPreUpgradeTool.java      | 181 +++++++++++++++++----
 3 files changed, 302 insertions(+), 66 deletions(-)

diff --git a/upgrade-acid/pre-upgrade/src/main/java/org/apache/hadoop/hive/upgrade/acid/PreUpgradeTool.java b/upgrade-acid/pre-upgrade/src/main/java/org/apache/hadoop/hive/upgrade/acid/PreUpgradeTool.java
index 0e3e3e2..0a7354d 100644
--- a/upgrade-acid/pre-upgrade/src/main/java/org/apache/hadoop/hive/upgrade/acid/PreUpgradeTool.java
+++ b/upgrade-acid/pre-upgrade/src/main/java/org/apache/hadoop/hive/upgrade/acid/PreUpgradeTool.java
@@ -17,7 +17,23 @@
  */
 package org.apache.hadoop.hive.upgrade.acid;
 
-import com.google.common.annotations.VisibleForTesting;
+import static org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.escapeSQLString;
+
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.nio.ByteBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
 import org.apache.commons.cli.CommandLine;
 import org.apache.commons.cli.CommandLineParser;
 import org.apache.commons.cli.GnuParser;
@@ -38,6 +54,7 @@ import org.apache.hadoop.hive.metastore.HiveMetaHookLoader;
 import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.RetryingMetaStoreClient;
+import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.metastore.api.CompactionResponse;
 import org.apache.hadoop.hive.metastore.api.MetaException;
@@ -49,10 +66,10 @@ import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.metastore.txn.TxnStore;
 import org.apache.hadoop.hive.metastore.txn.TxnUtils;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
-import org.apache.hadoop.hive.ql.metadata.Hive;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.io.orc.OrcFile;
 import org.apache.hadoop.hive.ql.io.orc.Reader;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.shims.HadoopShims;
 import org.apache.hadoop.security.AccessControlException;
@@ -62,20 +79,7 @@ import org.apache.thrift.TException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.nio.ByteBuffer;
-import java.nio.charset.CharacterCodingException;
-import java.nio.charset.Charset;
-import java.nio.charset.CharsetDecoder;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import static org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.escapeSQLString;
+import com.google.common.annotations.VisibleForTesting;
 
 /**
  * This utility is designed to help with upgrading Hive 2.x to Hive 3.0.  On-disk layout for
@@ -122,8 +126,6 @@ public class PreUpgradeTool {
     tool.init();
     CommandLineParser parser = new GnuParser();
     CommandLine line ;
-    String outputDir = ".";
-    boolean execute = false;
     try {
       line = parser.parse(tool.cmdLineOptions, args);
     } catch (ParseException e) {
@@ -136,13 +138,8 @@ public class PreUpgradeTool {
       formatter.printHelp("upgrade-acid", tool.cmdLineOptions);
       return;
     }
-    if(line.hasOption("location")) {
-      outputDir = line.getOptionValue("location");
-    }
-    if(line.hasOption("execute")) {
-      execute = true;
-    }
-    LOG.info("Starting with execute=" + execute + ", location=" + outputDir);
+    RunOptions runOptions = RunOptions.fromCommandLine(line);
+    LOG.info("Starting with " + runOptions.toString());
 
     try {
       String hiveVer = HiveVersionInfo.getShortVersion();
@@ -151,7 +148,7 @@ public class PreUpgradeTool {
       if(!hiveVer.startsWith("2.")) {
         throw new IllegalStateException("preUpgrade requires Hive 2.x.  Actual: " + hiveVer);
       }
-      tool.prepareAcidUpgradeInternal(outputDir, execute);
+      tool.prepareAcidUpgradeInternal(runOptions);
     }
     catch(Exception ex) {
       LOG.error("PreUpgradeTool failed", ex);
@@ -172,8 +169,33 @@ public class PreUpgradeTool {
           "Executes commands equivalent to generated scrips");
       exec.setOptionalArg(true);
       cmdLineOptions.addOption(exec);
-      cmdLineOptions.addOption(new Option("location", true,
-          "Location to write scripts to. Default is CWD."));
+      Option locationOption = new Option("location", true,
+              "Location to write scripts to. Default is CWD.");
+      locationOption.setArgName("path of directory");
+      cmdLineOptions.addOption(locationOption);
+
+      Option dbRegexOption = new Option("d",
+              "Regular expression to match database names on which this tool will be run. Default: all databases");
+      dbRegexOption.setLongOpt("dbRegex");
+      dbRegexOption.setArgs(1);
+      dbRegexOption.setArgName("regex");
+      cmdLineOptions.addOption(dbRegexOption);
+
+      Option tableRegexOption = new Option("t",
+              "Regular expression to match table names on which this tool will be run. Default: all tables");
+      tableRegexOption.setLongOpt("tableRegex");
+      tableRegexOption.setArgs(1);
+      tableRegexOption.setArgName("regex");
+      cmdLineOptions.addOption(tableRegexOption);
+
+      Option tableTypeOption = new Option("tt",
+              String.format("Table type to match tables on which this tool will be run. Possible values: %s " +
+                      "Default: all tables",
+                      Arrays.stream(TableType.values()).map(Enum::name).collect(Collectors.joining("|"))));
+      tableTypeOption.setLongOpt("tableType");
+      tableTypeOption.setArgs(1);
+      tableTypeOption.setArgName("table type");
+      cmdLineOptions.addOption(tableTypeOption);
     }
     catch(Exception ex) {
       LOG.error("init()", ex);
@@ -219,7 +241,7 @@ public class PreUpgradeTool {
   /**
    * todo: change script comments to a preamble instead of a footer
    */
-  private void prepareAcidUpgradeInternal(String scriptLocation, boolean execute)
+  private void prepareAcidUpgradeInternal(RunOptions runOptions)
       throws HiveException, TException, IOException {
     HiveConf conf = hiveConf != null ? hiveConf : new HiveConf();
     boolean isAcidEnabled = isAcidEnabled(conf);
@@ -232,16 +254,22 @@ public class PreUpgradeTool {
     ValidTxnList txns = null;
     Hive db = null;
     try {
-      databases = hms.getAllDatabases();//TException
+      databases = hms.getDatabases(runOptions.getDbRegex()); //TException
       LOG.debug("Found " + databases.size() + " databases to process");
-      if (execute) {
+      if (runOptions.isExecute()) {
         db = Hive.get(conf);
       }
 
       for (String dbName : databases) {
         try {
-          List<String> tables = hms.getAllTables(dbName);
-          LOG.debug("found " + tables.size() + " tables in " + dbName);
+          List<String> tables;
+          if (runOptions.getTableType() == null) {
+            tables = hms.getTables(dbName, runOptions.getTableRegex());
+            LOG.debug("found {} tables in {}", tables.size(), dbName);
+          } else {
+            tables = hms.getTables(dbName, runOptions.getTableRegex(), runOptions.getTableType());
+            LOG.debug("found {} {} in {}", tables.size(), runOptions.getTableType().name(), dbName);
+          }
           for (String tableName : tables) {
             try {
               Table t = hms.getTable(dbName, tableName);
@@ -257,7 +285,7 @@ public class PreUpgradeTool {
                   txns = TxnUtils.createValidCompactTxnList(txnHandler.getOpenTxnsInfo());
                 }
                 List<String> compactionCommands =
-                  getCompactionCommands(t, conf, hms, compactionMetaInfo, execute, db, txns);
+                        getCompactionCommands(t, conf, hms, compactionMetaInfo, runOptions.isExecute(), db, txns);
                 compactions.addAll(compactionCommands);
               }
               /*todo: handle renaming files somewhere*/
@@ -293,9 +321,9 @@ public class PreUpgradeTool {
       throw new HiveException(exceptionMsg, e);
     }
 
-    makeCompactionScript(compactions, scriptLocation, compactionMetaInfo);
+    makeCompactionScript(compactions, runOptions.getOutputDir(), compactionMetaInfo);
 
-    if(execute) {
+    if(runOptions.isExecute()) {
       while(compactionMetaInfo.compactionIds.size() > 0) {
         LOG.debug("Will wait for " + compactionMetaInfo.compactionIds.size() +
             " compactions to complete");
@@ -336,7 +364,7 @@ public class PreUpgradeTool {
             }
             Thread.sleep(pollIntervalMs);
           } catch (InterruptedException ex) {
-            ;//this only responds to ^C
+            //this only responds to ^C
           }
         }
       }
diff --git a/upgrade-acid/pre-upgrade/src/main/java/org/apache/hadoop/hive/upgrade/acid/RunOptions.java b/upgrade-acid/pre-upgrade/src/main/java/org/apache/hadoop/hive/upgrade/acid/RunOptions.java
new file mode 100644
index 0000000..66213d4
--- /dev/null
+++ b/upgrade-acid/pre-upgrade/src/main/java/org/apache/hadoop/hive/upgrade/acid/RunOptions.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.upgrade.acid;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.hadoop.hive.metastore.TableType;
+
+/**
+ * This class's instance holds the option values were passed by the user via the command line.
+ */
+public class RunOptions {
+
+  public static RunOptions fromCommandLine(CommandLine commandLine) {
+    String tableTypeText = commandLine.getOptionValue("tableType");
+    return new RunOptions(
+      commandLine.getOptionValue("location", "."),
+      commandLine.hasOption("execute"),
+      commandLine.getOptionValue("dbRegex", ".*"),
+      commandLine.getOptionValue("tableRegex", ".*"),
+      tableTypeText == null ? null : TableType.valueOf(tableTypeText)
+    );
+  }
+
+  private final String outputDir;
+  private final boolean execute;
+  private final String dbRegex;
+  private final String tableRegex;
+  private final TableType tableType;
+
+  public RunOptions(String outputDir, boolean execute, String dbRegex, String tableRegex, TableType tableType) {
+    this.outputDir = outputDir;
+    this.execute = execute;
+    this.dbRegex = dbRegex;
+    this.tableRegex = tableRegex;
+    this.tableType = tableType;
+  }
+
+  public String getOutputDir() {
+    return outputDir;
+  }
+
+  public boolean isExecute() {
+    return execute;
+  }
+
+  public String getDbRegex() {
+    return dbRegex;
+  }
+
+  public String getTableRegex() {
+    return tableRegex;
+  }
+
+  public TableType getTableType() {
+    return tableType;
+  }
+
+  @Override
+  public String toString() {
+    return "RunOptions{" +
+            "outputDir='" + outputDir + '\'' +
+            ", execute=" + execute +
+            ", dbRegex='" + dbRegex + '\'' +
+            ", tableRegex='" + tableRegex + '\'' +
+            ", tableType=" + tableType +
+            '}';
+  }
+}
diff --git a/upgrade-acid/pre-upgrade/src/test/java/org/apache/hadoop/hive/upgrade/acid/TestPreUpgradeTool.java b/upgrade-acid/pre-upgrade/src/test/java/org/apache/hadoop/hive/upgrade/acid/TestPreUpgradeTool.java
index 90230d5d..e514e80 100644
--- a/upgrade-acid/pre-upgrade/src/test/java/org/apache/hadoop/hive/upgrade/acid/TestPreUpgradeTool.java
+++ b/upgrade-acid/pre-upgrade/src/test/java/org/apache/hadoop/hive/upgrade/acid/TestPreUpgradeTool.java
@@ -17,6 +17,25 @@
  */
 package org.apache.hadoop.hive.upgrade.acid;
 
+import static org.hamcrest.CoreMatchers.allOf;
+import static org.hamcrest.CoreMatchers.hasItem;
+import static org.hamcrest.CoreMatchers.is;
+import static org.hamcrest.CoreMatchers.not;
+import static org.hamcrest.CoreMatchers.nullValue;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.core.StringContains.containsString;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.attribute.PosixFilePermission;
+import java.nio.file.attribute.PosixFilePermissions;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.FileUtils;
@@ -30,12 +49,8 @@ import org.apache.hadoop.hive.metastore.txn.TxnStore;
 import org.apache.hadoop.hive.metastore.txn.TxnUtils;
 import org.apache.hadoop.hive.ql.Driver;
 import org.apache.hadoop.hive.ql.QueryState;
-import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
-import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.io.HiveInputFormat;
-import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.ql.txn.compactor.Worker;
@@ -46,16 +61,6 @@ import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TestName;
 
-import java.io.File;
-import java.nio.file.Files;
-import java.nio.file.attribute.FileAttribute;
-import java.nio.file.attribute.PosixFilePermission;
-import java.nio.file.attribute.PosixFilePermissions;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Set;
-import java.util.concurrent.atomic.AtomicBoolean;
-
 public class TestPreUpgradeTool {
   private static final String TEST_DATA_DIR = new File(System.getProperty("java.io.tmpdir") +
       File.separator + TestPreUpgradeTool.class.getCanonicalName() + "-" + System.currentTimeMillis()
@@ -71,7 +76,7 @@ public class TestPreUpgradeTool {
    */
   @Test
   public void testUpgrade() throws Exception {
-    int[][] data = {{1,2}, {3, 4}, {5, 6}};
+    int[][] data = {{1, 2}, {3, 4}, {5, 6}};
     int[][] dataPart = {{1, 2, 10}, {3, 4, 11}, {5, 6, 12}};
     runStatementOnDriver("drop table if exists TAcid");
     runStatementOnDriver("drop table if exists TAcidPart");
@@ -117,13 +122,14 @@ public class TestPreUpgradeTool {
       PreUpgradeTool.pollIntervalMs = 1;
       PreUpgradeTool.hiveConf = hiveConf;
       PreUpgradeTool.main(args);
-    /*
-    todo: parse
-    target/tmp/org.apache.hadoop.hive.upgrade.acid.TestPreUpgradeTool-1527286256834/compacts_1527286277624.sql
-    make sure it's the only 'compacts' file and contains
-    ALTER TABLE default.tacid COMPACT 'major';
-ALTER TABLE default.tacidpart PARTITION(p=10Y) COMPACT 'major';
-    * */
+
+      String[] scriptFiles = getScriptFiles();
+      assertThat(scriptFiles.length, is(1));
+
+      List<String> scriptContent = loadScriptContent(new File(getTestDataDir(), scriptFiles[0]));
+      assertThat(scriptContent.size(), is(2));
+      assertThat(scriptContent, hasItem(is("ALTER TABLE default.tacid COMPACT 'major';")));
+      assertThat(scriptContent, hasItem(is("ALTER TABLE default.tacidpart PARTITION(p=10Y) COMPACT 'major';")));
 
       TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
 
@@ -133,11 +139,20 @@ ALTER TABLE default.tacidpart PARTITION(p=10Y) COMPACT 'major';
         Assert.assertEquals(e.toString(), TxnStore.CLEANING_RESPONSE, e.getState());
       }
 
-      String[] args2 = {"-location", getTestDataDir()};
+      // Check whether compaction was successful in the first run
+      File secondRunDataDir = new File(getTestDataDir(), "secondRun");
+      if (!secondRunDataDir.exists()) {
+        if (!secondRunDataDir.mkdir()) {
+          throw new IOException("Unable to create directory" + secondRunDataDir.getAbsolutePath());
+        }
+      }
+      String[] args2 = {"-location", secondRunDataDir.getAbsolutePath()};
       PreUpgradeTool.main(args2);
-      /*
-       * todo: parse compacts script - make sure there is nothing in it
-       * */
+
+      scriptFiles = secondRunDataDir.list();
+      assertThat(scriptFiles, is(not(nullValue())));
+      assertThat(scriptFiles.length, is(0));
+
     } finally {
       runStatementOnDriver("drop table if exists TAcid");
       runStatementOnDriver("drop table if exists TAcidPart");
@@ -146,9 +161,119 @@ ALTER TABLE default.tacidpart PARTITION(p=10Y) COMPACT 'major';
     }
   }
 
+  private static final String INCLUDE_DATABASE_NAME ="DInclude";
+  private static final String EXCLUDE_DATABASE_NAME ="DExclude";
+
+  @Test
+  public void testOnlyFilteredDatabasesAreUpgradedWhenRegexIsGiven() throws Exception {
+    int[][] data = {{1, 2}, {3, 4}, {5, 6}};
+    runStatementOnDriver("drop database if exists " + INCLUDE_DATABASE_NAME + " cascade");
+    runStatementOnDriver("drop database if exists " + EXCLUDE_DATABASE_NAME + " cascade");
+
+    try {
+      runStatementOnDriver("create database " + INCLUDE_DATABASE_NAME);
+      runStatementOnDriver("use " + INCLUDE_DATABASE_NAME);
+      runStatementOnDriver("create table " + INCLUDE_TABLE_NAME + " (a int, b int) clustered by (b) " +
+              "into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')");
+      runStatementOnDriver("insert into " + INCLUDE_TABLE_NAME + makeValuesClause(data));
+      runStatementOnDriver("update " + INCLUDE_TABLE_NAME + " set a = 1 where b = 2");
+
+      runStatementOnDriver("create database " + EXCLUDE_DATABASE_NAME);
+      runStatementOnDriver("use " + EXCLUDE_DATABASE_NAME);
+      runStatementOnDriver("create table " + EXCLUDE_DATABASE_NAME + " (a int, b int) clustered by (b) " +
+                "into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')");
+      runStatementOnDriver("insert into " + EXCLUDE_DATABASE_NAME + makeValuesClause(data));
+      runStatementOnDriver("update " + EXCLUDE_DATABASE_NAME + " set a = 1 where b = 2");
+
+      String[] args = {"-location", getTestDataDir(), "-dbRegex", "*include*"};
+      PreUpgradeTool.callback = new PreUpgradeTool.Callback() {
+        @Override
+        void onWaitForCompaction() throws MetaException {
+          runWorker(hiveConf);
+        }
+      };
+      PreUpgradeTool.pollIntervalMs = 1;
+      PreUpgradeTool.hiveConf = hiveConf;
+      PreUpgradeTool.main(args);
+
+      String[] scriptFiles = getScriptFiles();
+      assertThat(scriptFiles.length, is(1));
+
+      List<String> scriptContent = loadScriptContent(new File(getTestDataDir(), scriptFiles[0]));
+      assertThat(scriptContent.size(), is(1));
+      assertThat(scriptContent.get(0), is("ALTER TABLE dinclude.tinclude COMPACT 'major';"));
+
+    } finally {
+      runStatementOnDriver("drop database if exists " + INCLUDE_DATABASE_NAME + " cascade");
+      runStatementOnDriver("drop database if exists " + EXCLUDE_DATABASE_NAME + " cascade");
+    }
+  }
+
+  private static final String INCLUDE_TABLE_NAME ="TInclude";
+  private static final String EXCLUDE_TABLE_NAME ="TExclude";
+
+  @Test
+  public void testOnlyFilteredTablesAreUpgradedWhenRegexIsGiven() throws Exception {
+    int[][] data = {{1, 2}, {3, 4}, {5, 6}};
+    runStatementOnDriver("drop table if exists " + INCLUDE_TABLE_NAME);
+    runStatementOnDriver("drop table if exists " + EXCLUDE_TABLE_NAME);
+
+    try {
+      runStatementOnDriver("create table " + INCLUDE_TABLE_NAME + " (a int, b int) clustered by (b) " +
+              "into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')");
+      runStatementOnDriver("create table " + EXCLUDE_TABLE_NAME + " (a int, b int) clustered by (b) " +
+              "into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')");
+
+      runStatementOnDriver("insert into " + INCLUDE_TABLE_NAME + makeValuesClause(data));
+      runStatementOnDriver("update " + INCLUDE_TABLE_NAME + " set a = 1 where b = 2");
+
+      runStatementOnDriver("insert into " + EXCLUDE_TABLE_NAME + makeValuesClause(data));
+      runStatementOnDriver("update " + EXCLUDE_TABLE_NAME + " set a = 1 where b = 2");
+
+      String[] args = {"-location", getTestDataDir(), "-tableRegex", "*include*"};
+      PreUpgradeTool.callback = new PreUpgradeTool.Callback() {
+        @Override
+        void onWaitForCompaction() throws MetaException {
+          runWorker(hiveConf);
+        }
+      };
+      PreUpgradeTool.pollIntervalMs = 1;
+      PreUpgradeTool.hiveConf = hiveConf;
+      PreUpgradeTool.main(args);
+
+      String[] scriptFiles = getScriptFiles();
+      assertThat(scriptFiles.length, is(1));
+
+      List<String> scriptContent = loadScriptContent(new File(getTestDataDir(), scriptFiles[0]));
+      assertThat(scriptContent.size(), is(1));
+      assertThat(scriptContent.get(0), allOf(
+              containsString("ALTER TABLE"),
+              containsString(INCLUDE_TABLE_NAME.toLowerCase()),
+              containsString("COMPACT")));
+
+    } finally {
+      runStatementOnDriver("drop table if exists " + INCLUDE_TABLE_NAME);
+      runStatementOnDriver("drop table if exists " + EXCLUDE_TABLE_NAME);
+    }
+  }
+
+  private String[] getScriptFiles() {
+    File testDataDir = new File(getTestDataDir());
+    String[] scriptFiles = testDataDir.list((dir, name) -> name.startsWith("compacts_") && name.endsWith(".sql"));
+    assertThat(scriptFiles, is(not(nullValue())));
+    return scriptFiles;
+  }
+
+  private List<String> loadScriptContent(File file) throws IOException {
+    List<String> content = org.apache.commons.io.FileUtils.readLines(file);
+    content.removeIf(line -> line.startsWith("--"));
+    content.removeIf(StringUtils::isBlank);
+    return content;
+  }
+
   @Test
   public void testUpgradeExternalTableNoReadPermissionForDatabase() throws Exception {
-    int[][] data = {{1,2}, {3, 4}, {5, 6}};
+    int[][] data = {{1, 2}, {3, 4}, {5, 6}};
 
     runStatementOnDriver("drop database if exists test cascade");
     runStatementOnDriver("drop table if exists TExternal");
@@ -187,7 +312,7 @@ ALTER TABLE default.tacidpart PARTITION(p=10Y) COMPACT 'major';
 
   @Test
   public void testUpgradeExternalTableNoReadPermissionForTable() throws Exception {
-    int[][] data = {{1,2}, {3, 4}, {5, 6}};
+    int[][] data = {{1, 2}, {3, 4}, {5, 6}};
     runStatementOnDriver("drop table if exists TExternal");
 
     runStatementOnDriver("create table TExternal (a int, b int) stored as orc tblproperties('transactional'='false')");

[hive] 03/04: HIVE-21547: Temp Tables: Use stORC format for temporary tables (Krisztian Kasa via Gopal V)

Posted by kg...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git

commit cd3d71b34eb482e7abddd68b477ff05644b99d32
Author: Krisztian Kasa <kk...@cloudera.com>
AuthorDate: Tue Jul 9 17:36:22 2019 +0200

    HIVE-21547: Temp Tables: Use stORC format for temporary tables (Krisztian Kasa via Gopal V)
    
    Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>
---
 .../java/org/apache/hadoop/hive/ql/io/AcidOutputFormat.java   | 11 +++++++++++
 .../org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java     |  3 ++-
 .../org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java    |  4 ++--
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidOutputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidOutputFormat.java
index e6b8490..1e8bb22 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidOutputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidOutputFormat.java
@@ -73,6 +73,8 @@ public interface AcidOutputFormat<K extends WritableComparable, V> extends HiveO
      * todo: link to AcidUtils?
      */
     private long visibilityTxnId = 0;
+    private boolean temporary = false;
+
     /**
      * Create the options object.
      * @param conf Use the given configuration
@@ -334,6 +336,15 @@ public interface AcidOutputFormat<K extends WritableComparable, V> extends HiveO
     public long getVisibilityTxnId() {
       return visibilityTxnId;
     }
+
+    public Options temporary(boolean temporary) {
+      this.temporary = temporary;
+      return this;
+    }
+
+    public boolean isTemporary() {
+      return temporary;
+    }
   }
 
   /**
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
index 2be795b..8980a62 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
@@ -347,7 +347,8 @@ public final class HiveFileFormatUtils {
         .inspector(inspector)
         .recordIdColumn(rowIdColNum)
         .statementId(conf.getStatementId())
-        .finalDestination(conf.getDestPath()));
+        .finalDestination(conf.getDestPath())
+        .temporary(conf.isTemporary()));
   }
 
   public static <T> T getFromPathRecursively(Map<Path, T> pathToPartitionInfo, Path dir,
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
index 2e4ef31..3fa61d3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
@@ -355,8 +355,8 @@ public class OrcRecordUpdater implements RecordUpdater {
       writerOptions.bufferSize(baseBufferSizeValue / ratio);
       writerOptions.stripeSize(baseStripeSizeValue / ratio);
       writerOptions.blockPadding(false);
-      if (optionsCloneForDelta.getConfiguration().getBoolean(
-        HiveConf.ConfVars.HIVE_ORC_DELTA_STREAMING_OPTIMIZATIONS_ENABLED.varname, false)) {
+      if (HiveConf.getBoolVar(optionsCloneForDelta.getConfiguration(),
+              HiveConf.ConfVars.HIVE_ORC_DELTA_STREAMING_OPTIMIZATIONS_ENABLED) || options.isTemporary()) {
         writerOptions.encodingStrategy(org.apache.orc.OrcFile.EncodingStrategy.SPEED);
         writerOptions.rowIndexStride(0);
         writerOptions.getConfiguration().set(OrcConf.DICTIONARY_KEY_SIZE_THRESHOLD.getAttribute(), "-1.0");