You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2015/09/04 04:54:51 UTC

[02/28] hive git commit: HIVE-11669: OrcFileDump service should support directories (Prasanth Jayachandran reviewed by Sergey Shelukhin)

HIVE-11669: OrcFileDump service should support directories (Prasanth Jayachandran reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8e712da0
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8e712da0
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8e712da0

Branch: refs/heads/llap
Commit: 8e712da0d8464173e0977b61661bbd00960b08d8
Parents: 2ef40ca
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Fri Aug 28 13:13:18 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Fri Aug 28 13:13:18 2015 -0500

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/io/orc/FileDump.java  | 48 ++++++++++++++++++--
 1 file changed, 44 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/8e712da0/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
index 4acb810..76ecb33 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
@@ -22,7 +22,7 @@ import java.io.OutputStreamWriter;
 import java.io.PrintStream;
 import java.text.DecimalFormat;
 import java.util.ArrayList;
-import java.util.Arrays;
+import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 
@@ -32,8 +32,10 @@ import org.apache.commons.cli.HelpFormatter;
 import org.apache.commons.cli.OptionBuilder;
 import org.apache.commons.cli.Options;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO;
 import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndex;
 import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndexEntry;
@@ -47,6 +49,9 @@ import org.apache.hadoop.io.LongWritable;
 import org.codehaus.jettison.json.JSONException;
 import org.codehaus.jettison.json.JSONWriter;
 
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+
 /**
  * A tool for printing out the file structure of ORC files.
  */
@@ -86,23 +91,55 @@ public final class FileDump {
       System.err.println("Error : ORC files are not specified");
       return;
     }
+
+    // if the specified path is directory, iterate through all files and print the file dump
+    List<String> filesInPath = Lists.newArrayList();
+    for (String filename : files) {
+      Path path = new Path(filename);
+      filesInPath.addAll(getAllFilesInPath(path, conf));
+    }
+
     if (dumpData) {
-      printData(Arrays.asList(files), conf);
+      printData(filesInPath, conf);
     } else {
       if (jsonFormat) {
         boolean prettyPrint = cli.hasOption('p');
-        JsonFileDump.printJsonMetaData(Arrays.asList(files), conf, rowIndexCols, prettyPrint,
+        JsonFileDump.printJsonMetaData(filesInPath, conf, rowIndexCols, prettyPrint,
             printTimeZone);
       } else {
-        printMetaData(Arrays.asList(files), conf, rowIndexCols, printTimeZone);
+        printMetaData(filesInPath, conf, rowIndexCols, printTimeZone);
+      }
+    }
+  }
+
+  private static Collection<? extends String> getAllFilesInPath(final Path path,
+      final Configuration conf) throws IOException {
+    List<String> filesInPath = Lists.newArrayList();
+    FileSystem fs = path.getFileSystem(conf);
+    FileStatus fileStatus = fs.getFileStatus(path);
+    if (fileStatus.isDir()) {
+      FileStatus[] fileStatuses = fs.listStatus(path, AcidUtils.hiddenFileFilter);
+      for (FileStatus fileInPath : fileStatuses) {
+        if (fileInPath.isDir()) {
+          filesInPath.addAll(getAllFilesInPath(fileInPath.getPath(), conf));
+        } else {
+          filesInPath.add(fileInPath.getPath().toString());
+        }
       }
+    } else {
+      filesInPath.add(path.toString());
     }
+
+    return filesInPath;
   }
 
   private static void printData(List<String> files, Configuration conf) throws IOException,
       JSONException {
     for (String file : files) {
       printJsonData(conf, file);
+      if (files.size() > 1) {
+        System.out.println(Strings.repeat("=", 80) + "\n");
+      }
     }
   }
 
@@ -204,6 +241,9 @@ public final class FileDump {
       System.out.println("Padding length: " + paddedBytes + " bytes");
       System.out.println("Padding ratio: " + format.format(percentPadding) + "%");
       rows.close();
+      if (files.size() > 1) {
+        System.out.println(Strings.repeat("=", 80) + "\n");
+      }
     }
   }