You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/06/25 03:00:33 UTC
svn commit: r788232 - in /hadoop/hive/trunk: ./
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/io/
ql/src/test/queries/clientnegative/ ql/src/test/results/clientnegative/
Author: namit
Date: Thu Jun 25 01:00:32 2009
New Revision: 788232
URL: http://svn.apache.org/viewvc?rev=788232&view=rev
Log:
HIVE-472. HiveFileFormatUtils's checkInputFormat does not include RCFile.
(He Yongqiang via namit)
Added:
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/InputFormatChecker.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/SequenceFileInputFormatChecker.java
hadoop/hive/trunk/ql/src/test/queries/clientnegative/load_wrong_fileformat_rc_seq.q
hadoop/hive/trunk/ql/src/test/queries/clientnegative/load_wrong_fileformat_txt_seq.q
hadoop/hive/trunk/ql/src/test/results/clientnegative/load_wrong_fileformat_rc_seq.q.out
hadoop/hive/trunk/ql/src/test/results/clientnegative/load_wrong_fileformat_txt_seq.q.out
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileInputFormat.java
hadoop/hive/trunk/ql/src/test/results/clientnegative/load_wrong_fileformat.q.out
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=788232&r1=788231&r2=788232&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Thu Jun 25 01:00:32 2009
@@ -267,6 +267,9 @@
HIVE-575. Fix Map join out-of-memory problem. (Namit Jain via zshao)
+ HIVE-472. HiveFileFormatUtils's checkInputFormat does not include RCFile.
+ (He Yongqiang via namit)
+
Release 0.3.1 - Unreleased
INCOMPATIBLE CHANGES
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java?rev=788232&r1=788231&r2=788232&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java Thu Jun 25 01:00:32 2009
@@ -123,7 +123,9 @@
}
// Check if the file format of the file matches that of the table.
- HiveFileFormatUtils.checkInputFormat(fs, conf, tbd.getTable().getInputFileFormatClass(), files);
+ boolean flag = HiveFileFormatUtils.checkInputFormat(fs, conf, tbd.getTable().getInputFileFormatClass(), files);
+ if(!flag)
+ throw new HiveException("Wrong file format. Please check the file's format.");
}
if(tbd.getPartitionSpec().size() == 0) {
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java?rev=788232&r1=788231&r2=788232&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java Thu Jun 25 01:00:32 2009
@@ -22,6 +22,7 @@
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
+import java.util.Set;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -35,6 +36,7 @@
import org.apache.hadoop.mapred.OutputFormat;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.hadoop.mapred.TextInputFormat;
/**
* An util class for various Hive file format tasks.
@@ -43,9 +45,6 @@
* compatibility. They return the newly added HiveOutputFormat for the older
* ones.
*
- * }
- *
- *
*/
public class HiveFileFormatUtils {
@@ -59,7 +58,7 @@
@SuppressWarnings("unchecked")
private static Map<Class<? extends OutputFormat>, Class<? extends HiveOutputFormat>> outputFormatSubstituteMap;
-
+
/**
* register a substitute
*
@@ -104,35 +103,87 @@
}
return defaultFinalPath;
}
+
+ static {
+ inputFormatCheckerMap = new HashMap<Class<? extends InputFormat>, Class<? extends InputFormatChecker>>();
+ HiveFileFormatUtils.registerInputFormatChecker(SequenceFileInputFormat.class, SequenceFileInputFormatChecker.class);
+ HiveFileFormatUtils.registerInputFormatChecker(RCFileInputFormat.class, RCFileInputFormat.class);
+ inputFormatCheckerInstanceCache = new HashMap<Class<? extends InputFormatChecker>, InputFormatChecker>();
+ }
+
+ @SuppressWarnings("unchecked")
+ private static Map<Class<? extends InputFormat>, Class<? extends InputFormatChecker>> inputFormatCheckerMap;
+
+ private static Map<Class<? extends InputFormatChecker>, InputFormatChecker> inputFormatCheckerInstanceCache;
+
+ /**
+ * register an InputFormatChecker for a given InputFormat
+ *
+ * @param format
+ * the class that need to be substituted
+ * @param checker
+ */
+ @SuppressWarnings("unchecked")
+ public synchronized static void registerInputFormatChecker(
+ Class<? extends InputFormat> format,
+ Class<? extends InputFormatChecker> checker) {
+ inputFormatCheckerMap.put(format, checker);
+ }
+
+ /**
+ * get an InputFormatChecker for a file format.
+ */
+ public synchronized static Class<? extends InputFormatChecker> getInputFormatChecker(
+ Class<?> inputFormat) {
+ Class<? extends InputFormatChecker> result = inputFormatCheckerMap.get(inputFormat);
+ return result;
+ }
/**
* checks if files are in same format as the given input format
*/
+ @SuppressWarnings("unchecked")
public static boolean checkInputFormat(FileSystem fs, HiveConf conf,
Class<? extends InputFormat> inputFormatCls, ArrayList<FileStatus> files)
throws HiveException {
if (files.size() > 0) {
- boolean tableIsSequenceFile = inputFormatCls
- .equals(SequenceFileInputFormat.class);
- int fileId = 0;
- boolean fileIsSequenceFile = true;
- try {
- SequenceFile.Reader reader = new SequenceFile.Reader(fs, files.get(
- fileId).getPath(), conf);
- reader.close();
- } catch (IOException e) {
- fileIsSequenceFile = false;
- }
- if (!fileIsSequenceFile && tableIsSequenceFile) {
- throw new HiveException(
- "Cannot load text files into a table stored as SequenceFile.");
+ Class<? extends InputFormatChecker> checkerCls = getInputFormatChecker(inputFormatCls);
+ if(checkerCls==null && inputFormatCls.isAssignableFrom(TextInputFormat.class)) {
+ // we get a text input format here, we can not determine a file is text
+ // according to its content, so we can do is to test if other file
+ // format can accept it. If one other file format can accept this file,
+ // we treat this file as text file, although it maybe not.
+ return checkTextInputFormat(fs, conf, files);
}
- if (fileIsSequenceFile && !tableIsSequenceFile) {
- throw new HiveException(
- "Cannot load SequenceFiles into a table stored as TextFile.");
+
+ if (checkerCls != null) {
+ InputFormatChecker checkerInstance = inputFormatCheckerInstanceCache
+ .get(checkerCls);
+ try {
+ if (checkerInstance == null) {
+ checkerInstance = checkerCls.newInstance();
+ inputFormatCheckerInstanceCache.put(checkerCls, checkerInstance);
+ }
+ return checkerInstance.validateInput(fs, conf, files);
+ } catch (Exception e) {
+ throw new HiveException(e);
+ }
}
return true;
}
return false;
}
+
+ @SuppressWarnings("unchecked")
+ private static boolean checkTextInputFormat(FileSystem fs, HiveConf conf,
+ ArrayList<FileStatus> files) throws HiveException {
+ Set<Class<? extends InputFormat>> inputFormatter = inputFormatCheckerMap
+ .keySet();
+ for (Class<? extends InputFormat> reg : inputFormatter) {
+ boolean result = checkInputFormat(fs, conf, reg, files);
+ if (result)
+ return false;
+ }
+ return true;
+ }
}
Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/InputFormatChecker.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/InputFormatChecker.java?rev=788232&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/InputFormatChecker.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/InputFormatChecker.java Thu Jun 25 01:00:32 2009
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io;
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hive.conf.HiveConf;
+
+/**
+ * Check for validity of the input files.
+ */
+public interface InputFormatChecker {
+
+ /**
+ * This method is used to validate the input files
+ *
+ */
+ public boolean validateInput(FileSystem fs, HiveConf conf, ArrayList<FileStatus> files) throws IOException;
+
+}
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileInputFormat.java?rev=788232&r1=788231&r2=788232&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileInputFormat.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileInputFormat.java Thu Jun 25 01:00:32 2009
@@ -19,7 +19,11 @@
package org.apache.hadoop.hive.ql.io;
import java.io.IOException;
+import java.util.ArrayList;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
@@ -31,7 +35,7 @@
import org.apache.hadoop.mapred.Reporter;
public class RCFileInputFormat<K extends LongWritable, V extends BytesRefArrayWritable>
- extends FileInputFormat<K, V> {
+ extends FileInputFormat<K, V> implements InputFormatChecker{
public RCFileInputFormat() {
setMinSplitSize(SequenceFile.SYNC_INTERVAL);
@@ -45,4 +49,20 @@
return new RCFileRecordReader(job, (FileSplit) split);
}
+
+ @Override
+ public boolean validateInput(FileSystem fs, HiveConf conf,
+ ArrayList<FileStatus> files) throws IOException {
+ if (files.size() <= 0)
+ return false;
+ for (int fileId = 0; fileId < files.size(); fileId++) {
+ try {
+ RCFile.Reader reader = new RCFile.Reader(fs, files.get(fileId).getPath(), conf);
+ reader.close();
+ } catch (IOException e) {
+ return false;
+ }
+ }
+ return true;
+ }
}
Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/SequenceFileInputFormatChecker.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/SequenceFileInputFormatChecker.java?rev=788232&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/SequenceFileInputFormatChecker.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/SequenceFileInputFormatChecker.java Thu Jun 25 01:00:32 2009
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io;
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.io.SequenceFile;
+
+public class SequenceFileInputFormatChecker implements InputFormatChecker {
+
+ @Override
+ public boolean validateInput(FileSystem fs, HiveConf conf,
+ ArrayList<FileStatus> files) throws IOException {
+ if (files.size() <= 0)
+ return false;
+ for (int fileId = 0; fileId < files.size(); fileId++) {
+ try {
+ SequenceFile.Reader reader = new SequenceFile.Reader(fs, files.get(
+ fileId).getPath(), conf);
+ reader.close();
+ } catch (IOException e) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+}
Added: hadoop/hive/trunk/ql/src/test/queries/clientnegative/load_wrong_fileformat_rc_seq.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientnegative/load_wrong_fileformat_rc_seq.q?rev=788232&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientnegative/load_wrong_fileformat_rc_seq.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientnegative/load_wrong_fileformat_rc_seq.q Thu Jun 25 01:00:32 2009
@@ -0,0 +1,6 @@
+-- test for loading into tables with the correct file format
+-- test for loading into partitions with the correct file format
+
+DROP TABLE T1;
+CREATE TABLE T1(name STRING) STORED AS RCFILE;
+LOAD DATA LOCAL INPATH '../data/files/kv1.seq' INTO TABLE T1;
\ No newline at end of file
Added: hadoop/hive/trunk/ql/src/test/queries/clientnegative/load_wrong_fileformat_txt_seq.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientnegative/load_wrong_fileformat_txt_seq.q?rev=788232&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientnegative/load_wrong_fileformat_txt_seq.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientnegative/load_wrong_fileformat_txt_seq.q Thu Jun 25 01:00:32 2009
@@ -0,0 +1,6 @@
+-- test for loading into tables with the correct file format
+-- test for loading into partitions with the correct file format
+
+DROP TABLE T1;
+CREATE TABLE T1(name STRING) STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH '../data/files/kv1.seq' INTO TABLE T1;
\ No newline at end of file
Modified: hadoop/hive/trunk/ql/src/test/results/clientnegative/load_wrong_fileformat.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientnegative/load_wrong_fileformat.q.out?rev=788232&r1=788231&r2=788232&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientnegative/load_wrong_fileformat.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientnegative/load_wrong_fileformat.q.out Thu Jun 25 01:00:32 2009
@@ -4,5 +4,5 @@
DROP TABLE T1
query: CREATE TABLE T1(name STRING) STORED AS SEQUENCEFILE
query: LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE T1
-Failed with exception Cannot load text files into a table stored as SequenceFile.
+Failed with exception Wrong file format. Please check the file's format.
FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.MoveTask
Added: hadoop/hive/trunk/ql/src/test/results/clientnegative/load_wrong_fileformat_rc_seq.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientnegative/load_wrong_fileformat_rc_seq.q.out?rev=788232&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientnegative/load_wrong_fileformat_rc_seq.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientnegative/load_wrong_fileformat_rc_seq.q.out Thu Jun 25 01:00:32 2009
@@ -0,0 +1,8 @@
+query: -- test for loading into tables with the correct file format
+-- test for loading into partitions with the correct file format
+
+DROP TABLE T1
+query: CREATE TABLE T1(name STRING) STORED AS RCFILE
+query: LOAD DATA LOCAL INPATH '../data/files/kv1.seq' INTO TABLE T1
+Failed with exception Wrong file format. Please check the file's format.
+FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.MoveTask
Added: hadoop/hive/trunk/ql/src/test/results/clientnegative/load_wrong_fileformat_txt_seq.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientnegative/load_wrong_fileformat_txt_seq.q.out?rev=788232&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientnegative/load_wrong_fileformat_txt_seq.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientnegative/load_wrong_fileformat_txt_seq.q.out Thu Jun 25 01:00:32 2009
@@ -0,0 +1,8 @@
+query: -- test for loading into tables with the correct file format
+-- test for loading into partitions with the correct file format
+
+DROP TABLE T1
+query: CREATE TABLE T1(name STRING) STORED AS TEXTFILE
+query: LOAD DATA LOCAL INPATH '../data/files/kv1.seq' INTO TABLE T1
+Failed with exception Wrong file format. Please check the file's format.
+FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.MoveTask