You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/04/09 08:13:46 UTC

svn commit: r763522 - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/parse/ ql/src/java/org/apache/hadoop/hive/ql/plan/

Author: namit
Date: Thu Apr  9 06:13:46 2009
New Revision: 763522

URL: http://svn.apache.org/viewvc?rev=763522&view=rev
Log:
HIVE-393. Remove unnecessary checks in movetask for file type.
(Zheng Shao via namit)


Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/moveWork.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=763522&r1=763521&r2=763522&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Thu Apr  9 06:13:46 2009
@@ -23,6 +23,9 @@
 
     HIVE-385. Split Driver's run into compile and execute.
     (Neil Conway via namit)
+  
+    HIVE-393. Remove unnecessary checks in movetask for file type.
+    (Zheng Shao via namit)
 
 Release 0.3.0 - Unreleased
 

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java?rev=763522&r1=763521&r2=763522&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java Thu Apr  9 06:13:46 2009
@@ -99,43 +99,44 @@
         String mesg_detail = " from " + tbd.getSourceDir();
         console.printInfo(mesg, mesg_detail);
 
-        // Get the file format of the table
-        boolean tableIsSequenceFile = tbd.getTable().getInputFileFormatClass().equals(SequenceFileInputFormat.class);
-        // Get all files from the src directory
-        FileStatus [] dirs;
-        ArrayList<FileStatus> files;
-        try {
-          fs = FileSystem.get(db.getTable(tbd.getTable().getTableName()).getDataLocation(),
-              Hive.get().getConf());
-          dirs = fs.globStatus(new Path(tbd.getSourceDir()));
-          files = new ArrayList<FileStatus>();
-          for (int i=0; (dirs != null && i<dirs.length); i++) {
-            files.addAll(Arrays.asList(fs.listStatus(dirs[i].getPath())));
-            // We only check one file, so exit the loop when we have at least one.
-            if (files.size()>0) break;
-          }
-        } catch (IOException e) {
-          throw new HiveException("addFiles: filesystem error in check phase", e);
-        }
-        // Check if the file format of the file matches that of the table.
-        if (files.size() > 0) {
-          int fileId = 0;
-          boolean fileIsSequenceFile = true;   
+        if (work.getCheckFileFormat()) {
+          // Get the file format of the table
+          boolean tableIsSequenceFile = tbd.getTable().getInputFileFormatClass().equals(SequenceFileInputFormat.class);
+          // Get all files from the src directory
+          FileStatus [] dirs;
+          ArrayList<FileStatus> files;
           try {
-            SequenceFile.Reader reader = new SequenceFile.Reader(
-              fs, files.get(fileId).getPath(), conf);
-            reader.close();
+            fs = FileSystem.get(db.getTable(tbd.getTable().getTableName()).getDataLocation(),
+                Hive.get().getConf());
+            dirs = fs.globStatus(new Path(tbd.getSourceDir()));
+            files = new ArrayList<FileStatus>();
+            for (int i=0; (dirs != null && i<dirs.length); i++) {
+              files.addAll(Arrays.asList(fs.listStatus(dirs[i].getPath())));
+              // We only check one file, so exit the loop when we have at least one.
+              if (files.size()>0) break;
+            }
           } catch (IOException e) {
-            fileIsSequenceFile = false;
+            throw new HiveException("addFiles: filesystem error in check phase", e);
           }
-          if (!fileIsSequenceFile && tableIsSequenceFile) {
-            throw new HiveException("Cannot load text files into a table stored as SequenceFile.");
+          // Check if the file format of the file matches that of the table.
+          if (files.size() > 0) {
+            int fileId = 0;
+            boolean fileIsSequenceFile = true;   
+            try {
+              SequenceFile.Reader reader = new SequenceFile.Reader(
+                fs, files.get(fileId).getPath(), conf);
+              reader.close();
+            } catch (IOException e) {
+              fileIsSequenceFile = false;
+            }
+            if (!fileIsSequenceFile && tableIsSequenceFile) {
+              throw new HiveException("Cannot load text files into a table stored as SequenceFile.");
+            }
+            if (fileIsSequenceFile && !tableIsSequenceFile) {
+              throw new HiveException("Cannot load SequenceFiles into a table stored as TextFile.");
+            }
           }
-          if (fileIsSequenceFile && !tableIsSequenceFile) {
-            throw new HiveException("Cannot load SequenceFiles into a table stored as TextFile.");
-          }
-        }
-         
+        }           
 
         if(tbd.getPartitionSpec().size() == 0) {
           db.loadTable(new Path(tbd.getSourceDir()), tbd.getTable().getTableName(), tbd.getReplace());

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java?rev=763522&r1=763521&r2=763522&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java Thu Apr  9 06:13:46 2009
@@ -213,9 +213,9 @@
                                         isOverWrite));
 
     if(rTask != null) {
-      rTask.addDependentTask(TaskFactory.get(new moveWork(loadTableWork, loadFileWork), this.conf));
+      rTask.addDependentTask(TaskFactory.get(new moveWork(loadTableWork, loadFileWork, true), this.conf));
     } else {
-      rTask = TaskFactory.get(new moveWork(loadTableWork, loadFileWork), this.conf);
+      rTask = TaskFactory.get(new moveWork(loadTableWork, loadFileWork, true), this.conf);
     }
 
     rootTasks.add(rTask);

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=763522&r1=763521&r2=763522&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Thu Apr  9 06:13:46 2009
@@ -3437,7 +3437,7 @@
     else {
       // First we generate the move work as this needs to be made dependent on all
       // the tasks that have a file sink operation
-      mv = new moveWork(loadTableWork, loadFileWork);
+      mv = new moveWork(loadTableWork, loadFileWork, false);
       mvTask = TaskFactory.get(mv, this.conf);
     }
 

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/moveWork.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/moveWork.java?rev=763522&r1=763521&r2=763522&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/moveWork.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/moveWork.java Thu Apr  9 06:13:46 2009
@@ -27,12 +27,16 @@
   private List<loadTableDesc> loadTableWork;
   private List<loadFileDesc> loadFileWork;
 
+  private boolean checkFileFormat;
+
   public moveWork() { }
   public moveWork(
     final List<loadTableDesc> loadTableWork,
-    final List<loadFileDesc> loadFileWork) {
+    final List<loadFileDesc> loadFileWork,
+    boolean checkFileFormat) {
     this.loadTableWork = loadTableWork;
     this.loadFileWork = loadFileWork;
+    this.checkFileFormat = checkFileFormat;
   }
   @explain(displayName="tables")
   public List<loadTableDesc> getLoadTableWork() {
@@ -49,4 +53,12 @@
   public void setLoadFileWork(final List<loadFileDesc> loadFileWork) {
     this.loadFileWork=loadFileWork;
   }
+  
+  public boolean getCheckFileFormat() {
+    return checkFileFormat;
+  }
+  public void setCheckFileFormat(boolean checkFileFormat) {
+    this.checkFileFormat = checkFileFormat;
+  }
+  
 }