You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/11/04 02:38:14 UTC
svn commit: r832645 - in /hadoop/hive/trunk: ./
ql/src/java/org/apache/hadoop/hive/ql/io/
ql/src/test/org/apache/hadoop/hive/ql/io/
ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
serde/src/java/org/apache/hadoop/hive/serde2/ se...
Author: namit
Date: Wed Nov 4 01:38:12 2009
New Revision: 832645
URL: http://svn.apache.org/viewvc?rev=832645&view=rev
Log:
HIVE-910. NULL value is not correctly handled by ColumnarStruct
(He Yongqiang via namit)
Added:
hadoop/hive/trunk/ql/src/test/queries/clientpositive/rcfile_null_value.q
hadoop/hive/trunk/ql/src/test/results/clientpositive/rcfile_null_value.q.out
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java
hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/PerformTestRCFileAndSeqFile.java
hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java
hadoop/hive/trunk/ql/src/test/results/clientpositive/columnarserde_create_shortcut.q.out
hadoop/hive/trunk/ql/src/test/results/clientpositive/input_columnarserde.q.out
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=832645&r1=832644&r2=832645&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Wed Nov 4 01:38:12 2009
@@ -242,6 +242,9 @@
HIVE-907. NullPointerException in ErrorMsg.findSQLState
(Bill Graham via namit)
+ HIVE-910. NULL value is not correctly handled by ColumnarStruct
+ (He Yongqiang via namit)
+
Release 0.4.0 - Unreleased
INCOMPATIBLE CHANGES
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java?rev=832645&r1=832644&r2=832645&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java Wed Nov 4 01:38:12 2009
@@ -187,76 +187,4 @@
}
return true;
}
-
-
- public static String READ_COLUMN_IDS_CONF_STR = "hive.io.file.readcolumn.ids";
-
- /**
- * Sets read columns' ids(start from zero) for RCFile's Reader. Once a column
- * is included in the list, RCFile's reader will not skip its value.
- *
- */
- public static void setReadColumnIDs(Configuration conf, ArrayList<Integer> ids) {
- String id = toReadColumnIDString(ids);
- setReadColumnIDConf(conf, id);
- }
-
- /**
- * Sets read columns' ids(start from zero) for RCFile's Reader. Once a column
- * is included in the list, RCFile's reader will not skip its value.
- *
- */
- public static void appendReadColumnIDs(Configuration conf, ArrayList<Integer> ids) {
- String id = toReadColumnIDString(ids);
- String old = conf.get(READ_COLUMN_IDS_CONF_STR, null);
- String newConfStr = id;
- if(old !=null )
- newConfStr = newConfStr + StringUtils.COMMA_STR + old;
-
- setReadColumnIDConf(conf, newConfStr);
- }
-
- private static void setReadColumnIDConf(Configuration conf, String id) {
- if (id == null || id.length() <= 0) {
- conf.set(READ_COLUMN_IDS_CONF_STR, "");
- return;
- }
-
- conf.set(READ_COLUMN_IDS_CONF_STR, id);
- }
-
- private static String toReadColumnIDString(ArrayList<Integer> ids) {
- String id = null;
- if (ids != null) {
- for (int i = 0; i < ids.size(); i++) {
- if (i == 0) {
- id = "" + ids.get(i);
- } else {
- id = id + StringUtils.COMMA_STR + ids.get(i);
- }
- }
- }
- return id;
- }
-
- /**
- * Returns an array of column ids(start from zero) which is set in the given
- * parameter <tt>conf</tt>.
- */
- public static ArrayList<Integer> getReadColumnIDs(Configuration conf) {
- String skips = conf.get(READ_COLUMN_IDS_CONF_STR, "");
- String[] list = StringUtils.split(skips);
- ArrayList<Integer> result = new ArrayList<Integer>(list.length);
- for (int i = 0; i < list.length; i++) {
- result.add(Integer.parseInt(list[i]));
- }
- return result;
- }
-
- /**
- * Clears the read column ids set in the conf, and will read all columns.
- */
- public static void setFullyReadColumns(Configuration conf) {
- conf.set(READ_COLUMN_IDS_CONF_STR, "");
- }
}
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java?rev=832645&r1=832644&r2=832645&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java Wed Nov 4 01:38:12 2009
@@ -55,6 +55,8 @@
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
+
/**
* HiveInputFormat is a parameterized InputFormat which looks at the path name and determine
* the correct InputFormat for that path name from mapredPlan.pathToPartitionInfo().
@@ -201,12 +203,14 @@
throw new IOException("cannot find class " + inputFormatClassName);
}
- initColumnsNeeded(job, inputFormatClass, hsplit.getPath().toString(),
+ //clone a jobConf for setting needed columns for reading
+ JobConf cloneJobConf = new JobConf(job);
+ initColumnsNeeded(cloneJobConf, inputFormatClass, hsplit.getPath().toString(),
hsplit.getPath().toUri().getPath());
- InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
+ InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, cloneJobConf);
return new HiveRecordReader(inputFormat.getRecordReader(inputSplit,
- job, reporter));
+ cloneJobConf, reporter));
}
private Map<String, partitionDesc> pathToPartitionInfo;
@@ -311,9 +315,9 @@
TableScanOperator tableScan = (TableScanOperator) op;
ArrayList<Integer> list = tableScan.getNeededColumnIDs();
if (list != null)
- HiveFileFormatUtils.appendReadColumnIDs(jobConf, list);
+ ColumnProjectionUtils.appendReadColumnIDs(jobConf, list);
else
- HiveFileFormatUtils.setFullyReadColumns(jobConf);
+ ColumnProjectionUtils.setFullyReadColumns(jobConf);
}
}
}
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java?rev=832645&r1=832644&r2=832645&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java Wed Nov 4 01:38:12 2009
@@ -38,6 +38,7 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.CodecPool;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable;
import org.apache.hadoop.hive.serde2.columnar.LazyDecompressionCallback;
@@ -922,7 +923,7 @@
columnNumber = Integer.parseInt(metadata.get(
new Text(COLUMN_NUMBER_METADATA_STR)).toString());
- java.util.ArrayList<Integer> notSkipIDs = HiveFileFormatUtils.getReadColumnIDs(conf);
+ java.util.ArrayList<Integer> notSkipIDs = ColumnProjectionUtils.getReadColumnIDs(conf);
skippedColIDs = new boolean[columnNumber];
if (notSkipIDs.size() > 0) {
for (int i = 0; i < skippedColIDs.length; i++) {
@@ -1303,7 +1304,9 @@
if (!currentValue.inited) {
currentValueBuffer();
- ret.resetValid(columnNumber); // do this only when not intialized
+ // do this only when not initialized, but we may need to find a way to
+ // tell the caller how to initialize the valid size
+ ret.resetValid(columnNumber);
}
// we do not use BytesWritable here to avoid the byte-copy from
Modified: hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/PerformTestRCFileAndSeqFile.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/PerformTestRCFileAndSeqFile.java?rev=832645&r1=832644&r2=832645&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/PerformTestRCFileAndSeqFile.java (original)
+++ hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/PerformTestRCFileAndSeqFile.java Wed Nov 4 01:38:12 2009
@@ -8,6 +8,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
@@ -276,7 +277,7 @@
java.util.ArrayList<Integer> readCols = new java.util.ArrayList<Integer>();
readCols.add(Integer.valueOf(0));
- HiveFileFormatUtils.setReadColumnIDs(conf, readCols);
+ ColumnProjectionUtils.setReadColumnIDs(conf, readCols);
RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
LongWritable rowID = new LongWritable();
@@ -310,7 +311,7 @@
java.util.ArrayList<Integer> readCols = new java.util.ArrayList<Integer>();
readCols.add(Integer.valueOf(0));
readCols.add(Integer.valueOf(allColumnsNumber - 1));
- HiveFileFormatUtils.setReadColumnIDs(conf, readCols);
+ ColumnProjectionUtils.setReadColumnIDs(conf, readCols);
RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
LongWritable rowID = new LongWritable();
@@ -344,7 +345,7 @@
int actualReadCount = 0;
- HiveFileFormatUtils.setFullyReadColumns(conf);
+ ColumnProjectionUtils.setFullyReadColumns(conf);
RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
LongWritable rowID = new LongWritable();
Modified: hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java?rev=832645&r1=832644&r2=832645&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java (original)
+++ hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java Wed Nov 4 01:38:12 2009
@@ -32,6 +32,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.serde.Constants;
import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable;
import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe;
@@ -96,7 +97,7 @@
bytesArray = new byte[][] { "123".getBytes("UTF-8"),
"456".getBytes("UTF-8"), "789".getBytes("UTF-8"),
"1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"),
- "hive and hadoop".getBytes("UTF-8"), new byte[0], new byte[0] };
+ "hive and hadoop".getBytes("UTF-8"), new byte[0], "NULL".getBytes("UTF-8") };
s = new BytesRefArrayWritable(bytesArray.length);
s.set(0, new BytesRefWritable("123".getBytes("UTF-8")));
s.set(1, new BytesRefWritable("456".getBytes("UTF-8")));
@@ -127,11 +128,11 @@
byte[][] record_1 = { "123".getBytes("UTF-8"), "456".getBytes("UTF-8"),
"789".getBytes("UTF-8"), "1000".getBytes("UTF-8"),
"5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"),
- new byte[0], new byte[0] };
+ new byte[0], "NULL".getBytes("UTF-8") };
byte[][] record_2 = { "100".getBytes("UTF-8"), "200".getBytes("UTF-8"),
"123".getBytes("UTF-8"), "1000".getBytes("UTF-8"),
"5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"),
- new byte[0], new byte[0] };
+ new byte[0], "NULL".getBytes("UTF-8") };
RCFileOutputFormat.setColumnNumber(conf, expectedFieldsData.length);
RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null,
@@ -170,6 +171,7 @@
reader.next(rowID);
BytesRefArrayWritable cols = new BytesRefArrayWritable();
reader.getCurrentRow(cols);
+ cols.resetValid(8);
Object row = serDe.deserialize(cols);
StructObjectInspector oi = (StructObjectInspector) serDe
@@ -288,7 +290,7 @@
throws IOException, SerDeException {
LOG.debug("reading " + count + " records");
long start = System.currentTimeMillis();
- HiveFileFormatUtils.setFullyReadColumns(conf);
+ ColumnProjectionUtils.setFullyReadColumns(conf);
RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
LongWritable rowID = new LongWritable();
@@ -296,6 +298,7 @@
BytesRefArrayWritable cols = new BytesRefArrayWritable();
while (reader.next(rowID)) {
reader.getCurrentRow(cols);
+ cols.resetValid(8);
Object row = serDe.deserialize(cols);
StructObjectInspector oi = (StructObjectInspector) serDe
@@ -329,7 +332,7 @@
java.util.ArrayList<Integer> readCols = new java.util.ArrayList<Integer>();
readCols.add(Integer.valueOf(2));
readCols.add(Integer.valueOf(3));
- HiveFileFormatUtils.setReadColumnIDs(conf, readCols);
+ ColumnProjectionUtils.setReadColumnIDs(conf, readCols);
RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
LongWritable rowID = new LongWritable();
@@ -337,6 +340,7 @@
while (reader.next(rowID)) {
reader.getCurrentRow(cols);
+ cols.resetValid(8);
Object row = serDe.deserialize(cols);
StructObjectInspector oi = (StructObjectInspector) serDe
Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/rcfile_null_value.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/rcfile_null_value.q?rev=832645&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/rcfile_null_value.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/rcfile_null_value.q Wed Nov 4 01:38:12 2009
@@ -0,0 +1,40 @@
+CREATE TABLE src1_rc(key STRING, value STRING) STORED AS RCFILE;
+INSERT OVERWRITE TABLE src1_rc SELECT * FROM src1;
+SELECT * FROM src1_rc;
+DROP TABLE src1_rc;
+
+CREATE TABLE dest1_rc(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS RCFILE;
+
+EXPLAIN
+FROM (
+ FROM
+ (
+ FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20
+ ) a
+ RIGHT OUTER JOIN
+ (
+ FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25
+ ) b
+ ON (a.c1 = b.c3)
+ SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4
+) c
+INSERT OVERWRITE TABLE dest1_rc SELECT c.c1, c.c2, c.c3, c.c4;
+
+FROM (
+ FROM
+ (
+ FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20
+ ) a
+ RIGHT OUTER JOIN
+ (
+ FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25
+ ) b
+ ON (a.c1 = b.c3)
+ SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4
+) c
+INSERT OVERWRITE TABLE dest1_rc SELECT c.c1, c.c2, c.c3, c.c4;
+
+SELECT dest1_rc.* FROM dest1_rc;
+
+DROP TABLE dest1_rc;
+
Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/columnarserde_create_shortcut.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/columnarserde_create_shortcut.q.out?rev=832645&r1=832644&r2=832645&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/columnarserde_create_shortcut.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/columnarserde_create_shortcut.q.out Wed Nov 4 01:38:12 2009
@@ -94,11 +94,11 @@
PREHOOK: query: SELECT columnarserde_create_shortcut.* FROM columnarserde_create_shortcut DISTRIBUTE BY 1
PREHOOK: type: QUERY
PREHOOK: Input: default@columnarserde_create_shortcut
-PREHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/1010466676/10000
+PREHOOK: Output: file:/data/users/heyongqiang/trunk/VENDOR.hive/trunk/.ptest_1/build/ql/tmp/1405876765/10000
POSTHOOK: query: SELECT columnarserde_create_shortcut.* FROM columnarserde_create_shortcut DISTRIBUTE BY 1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@columnarserde_create_shortcut
-POSTHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/1010466676/10000
+POSTHOOK: Output: file:/data/users/heyongqiang/trunk/VENDOR.hive/trunk/.ptest_1/build/ql/tmp/1405876765/10000
[0,0,0] ["0","0","0"] {"key_0":"value_0"} 1712634731 record_0
[1,2,3] ["10","100","1000"] {"key_1":"value_1"} 465985200 record_1
[2,4,6] ["20","200","2000"] {"key_2":"value_2"} -751827638 record_2
@@ -109,15 +109,15 @@
[7,14,21] ["70","700","7000"] {"key_7":"value_7"} -1461153973 record_7
[8,16,24] ["80","800","8000"] {"key_8":"value_8"} 1638581578 record_8
[9,18,27] ["90","900","9000"] {"key_9":"value_9"} 336964413 record_9
-null null {} 0 NULL
+null null null 0 NULL
PREHOOK: query: SELECT columnarserde_create_shortcut.a[0], columnarserde_create_shortcut.b[0], columnarserde_create_shortcut.c['key2'], columnarserde_create_shortcut.d, columnarserde_create_shortcut.e FROM columnarserde_create_shortcut DISTRIBUTE BY 1
PREHOOK: type: QUERY
PREHOOK: Input: default@columnarserde_create_shortcut
-PREHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/236647428/10000
+PREHOOK: Output: file:/data/users/heyongqiang/trunk/VENDOR.hive/trunk/.ptest_1/build/ql/tmp/108106790/10000
POSTHOOK: query: SELECT columnarserde_create_shortcut.a[0], columnarserde_create_shortcut.b[0], columnarserde_create_shortcut.c['key2'], columnarserde_create_shortcut.d, columnarserde_create_shortcut.e FROM columnarserde_create_shortcut DISTRIBUTE BY 1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@columnarserde_create_shortcut
-POSTHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/236647428/10000
+POSTHOOK: Output: file:/data/users/heyongqiang/trunk/VENDOR.hive/trunk/.ptest_1/build/ql/tmp/108106790/10000
0 0 NULL 1712634731 record_0
1 10 NULL 465985200 record_1
2 20 NULL -751827638 record_2
@@ -162,11 +162,11 @@
PREHOOK: query: SELECT columnShortcutTable.* FROM columnShortcutTable
PREHOOK: type: QUERY
PREHOOK: Input: default@columnshortcuttable
-PREHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/531162572/10000
+PREHOOK: Output: file:/data/users/heyongqiang/trunk/VENDOR.hive/trunk/.ptest_1/build/ql/tmp/33712646/10000
POSTHOOK: query: SELECT columnShortcutTable.* FROM columnShortcutTable
POSTHOOK: type: QUERY
POSTHOOK: Input: default@columnshortcuttable
-POSTHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/531162572/10000
+POSTHOOK: Output: file:/data/users/heyongqiang/trunk/VENDOR.hive/trunk/.ptest_1/build/ql/tmp/33712646/10000
238 val_238
86 val_86
311 val_311
@@ -186,11 +186,11 @@
PREHOOK: query: SELECT columnShortcutTable.* FROM columnShortcutTable
PREHOOK: type: QUERY
PREHOOK: Input: default@columnshortcuttable
-PREHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/1510560377/10000
+PREHOOK: Output: file:/data/users/heyongqiang/trunk/VENDOR.hive/trunk/.ptest_1/build/ql/tmp/137255855/10000
POSTHOOK: query: SELECT columnShortcutTable.* FROM columnShortcutTable
POSTHOOK: type: QUERY
POSTHOOK: Input: default@columnshortcuttable
-POSTHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/1510560377/10000
+POSTHOOK: Output: file:/data/users/heyongqiang/trunk/VENDOR.hive/trunk/.ptest_1/build/ql/tmp/137255855/10000
238 val_238 NULL
86 val_86 NULL
311 val_311 NULL
@@ -210,11 +210,11 @@
PREHOOK: query: SELECT columnShortcutTable.* FROM columnShortcutTable
PREHOOK: type: QUERY
PREHOOK: Input: default@columnshortcuttable
-PREHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/1833240424/10000
+PREHOOK: Output: file:/data/users/heyongqiang/trunk/VENDOR.hive/trunk/.ptest_1/build/ql/tmp/1910249486/10000
POSTHOOK: query: SELECT columnShortcutTable.* FROM columnShortcutTable
POSTHOOK: type: QUERY
POSTHOOK: Input: default@columnshortcuttable
-POSTHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/1833240424/10000
+POSTHOOK: Output: file:/data/users/heyongqiang/trunk/VENDOR.hive/trunk/.ptest_1/build/ql/tmp/1910249486/10000
238
86
311
Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/input_columnarserde.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/input_columnarserde.q.out?rev=832645&r1=832644&r2=832645&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/input_columnarserde.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/input_columnarserde.q.out Wed Nov 4 01:38:12 2009
@@ -104,11 +104,11 @@
PREHOOK: query: SELECT input_columnarserde.* FROM input_columnarserde DISTRIBUTE BY 1
PREHOOK: type: QUERY
PREHOOK: Input: default@input_columnarserde
-PREHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/851213096/10000
+PREHOOK: Output: file:/data/users/heyongqiang/trunk/VENDOR.hive/trunk/.ptest_0/build/ql/tmp/1940999347/10000
POSTHOOK: query: SELECT input_columnarserde.* FROM input_columnarserde DISTRIBUTE BY 1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@input_columnarserde
-POSTHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/851213096/10000
+POSTHOOK: Output: file:/data/users/heyongqiang/trunk/VENDOR.hive/trunk/.ptest_0/build/ql/tmp/1940999347/10000
[0,0,0] ["0","0","0"] {"key_0":"value_0"} 1712634731 record_0
[1,2,3] ["10","100","1000"] {"key_1":"value_1"} 465985200 record_1
[2,4,6] ["20","200","2000"] {"key_2":"value_2"} -751827638 record_2
@@ -119,15 +119,15 @@
[7,14,21] ["70","700","7000"] {"key_7":"value_7"} -1461153973 record_7
[8,16,24] ["80","800","8000"] {"key_8":"value_8"} 1638581578 record_8
[9,18,27] ["90","900","9000"] {"key_9":"value_9"} 336964413 record_9
-null null {} 0 NULL
+null null null 0 NULL
PREHOOK: query: SELECT input_columnarserde.a[0], input_columnarserde.b[0], input_columnarserde.c['key2'], input_columnarserde.d, input_columnarserde.e FROM input_columnarserde DISTRIBUTE BY 1
PREHOOK: type: QUERY
PREHOOK: Input: default@input_columnarserde
-PREHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/722662209/10000
+PREHOOK: Output: file:/data/users/heyongqiang/trunk/VENDOR.hive/trunk/.ptest_0/build/ql/tmp/534165813/10000
POSTHOOK: query: SELECT input_columnarserde.a[0], input_columnarserde.b[0], input_columnarserde.c['key2'], input_columnarserde.d, input_columnarserde.e FROM input_columnarserde DISTRIBUTE BY 1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@input_columnarserde
-POSTHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/722662209/10000
+POSTHOOK: Output: file:/data/users/heyongqiang/trunk/VENDOR.hive/trunk/.ptest_0/build/ql/tmp/534165813/10000
0 0 NULL 1712634731 record_0
1 10 NULL 465985200 record_1
2 20 NULL -751827638 record_2
Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/rcfile_null_value.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/rcfile_null_value.q.out?rev=832645&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/rcfile_null_value.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/rcfile_null_value.q.out Wed Nov 4 01:38:12 2009
@@ -0,0 +1,276 @@
+PREHOOK: query: CREATE TABLE src1_rc(key STRING, value STRING) STORED AS RCFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE src1_rc(key STRING, value STRING) STORED AS RCFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@src1_rc
+PREHOOK: query: INSERT OVERWRITE TABLE src1_rc SELECT * FROM src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+PREHOOK: Output: default@src1_rc
+POSTHOOK: query: INSERT OVERWRITE TABLE src1_rc SELECT * FROM src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: default@src1_rc
+PREHOOK: query: SELECT * FROM src1_rc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1_rc
+PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/tmp/1289916622/10000
+POSTHOOK: query: SELECT * FROM src1_rc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1_rc
+POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/tmp/1289916622/10000
+238 val_238
+
+311 val_311
+ val_27
+ val_165
+ val_409
+255 val_255
+278 val_278
+98 val_98
+ val_484
+ val_265
+ val_193
+401 val_401
+150 val_150
+273 val_273
+224
+369
+66 val_66
+128
+213 val_213
+146 val_146
+406 val_406
+
+
+
+PREHOOK: query: DROP TABLE src1_rc
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE src1_rc
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: default@src1_rc
+PREHOOK: query: CREATE TABLE dest1_rc(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS RCFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE dest1_rc(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS RCFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest1_rc
+PREHOOK: query: EXPLAIN
+FROM (
+ FROM
+ (
+ FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20
+ ) a
+ RIGHT OUTER JOIN
+ (
+ FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25
+ ) b
+ ON (a.c1 = b.c3)
+ SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4
+) c
+INSERT OVERWRITE TABLE dest1_rc SELECT c.c1, c.c2, c.c3, c.c4
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM (
+ FROM
+ (
+ FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20
+ ) a
+ RIGHT OUTER JOIN
+ (
+ FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25
+ ) b
+ ON (a.c1 = b.c3)
+ SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4
+) c
+INSERT OVERWRITE TABLE dest1_rc SELECT c.c1, c.c2, c.c3, c.c4
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_RIGHTOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF src src1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key) c1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) value) c2)) (TOK_WHERE (and (> (. (TOK_TABLE_OR_COL src1) key) 10) (< (. (TOK_TABLE_OR_COL src1) key) 20))))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF src src2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) key) c3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) value) c4)) (TOK_WHERE (and (> (. (TOK_TABLE_OR_COL src2) key) 15) (< (. (TOK_TABLE_OR_COL src2) key) 25))))) b) (= (. (TOK_TABLE_OR_COL a) c1) (. (TOK_TABLE_OR_COL b) c3)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) c1) c1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) c2) c2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) c3) c3)
(TOK_SELEXPR (. (TOK_TABLE_OR_COL b) c4) c4)))) c)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest1_rc)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) c1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) c2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) c3)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) c4)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ c:a:src1
+ TableScan
+ alias: src1
+ Filter Operator
+ predicate:
+ expr: ((key > 10) and (key < 20))
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: ((key > 10) and (key < 20))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: 0
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ c:b:src2
+ TableScan
+ alias: src2
+ Filter Operator
+ predicate:
+ expr: ((key > 15) and (key < 25))
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: ((key > 15) and (key < 25))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: 1
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col0} {VALUE._col1}
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ expr: _col3
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ expr: _col3
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: UDFToInteger(_col0)
+ type: int
+ expr: _col1
+ type: string
+ expr: UDFToInteger(_col2)
+ type: int
+ expr: _col3
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: dest1_rc
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: dest1_rc
+
+
+PREHOOK: query: FROM (
+ FROM
+ (
+ FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20
+ ) a
+ RIGHT OUTER JOIN
+ (
+ FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25
+ ) b
+ ON (a.c1 = b.c3)
+ SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4
+) c
+INSERT OVERWRITE TABLE dest1_rc SELECT c.c1, c.c2, c.c3, c.c4
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1_rc
+POSTHOOK: query: FROM (
+ FROM
+ (
+ FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20
+ ) a
+ RIGHT OUTER JOIN
+ (
+ FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25
+ ) b
+ ON (a.c1 = b.c3)
+ SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4
+) c
+INSERT OVERWRITE TABLE dest1_rc SELECT c.c1, c.c2, c.c3, c.c4
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1_rc
+PREHOOK: query: SELECT dest1_rc.* FROM dest1_rc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1_rc
+PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/tmp/1827783552/10000
+POSTHOOK: query: SELECT dest1_rc.* FROM dest1_rc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1_rc
+POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/tmp/1827783552/10000
+17 val_17 17 val_17
+18 val_18 18 val_18
+18 val_18 18 val_18
+18 val_18 18 val_18
+18 val_18 18 val_18
+19 val_19 19 val_19
+NULL NULL 20 val_20
+NULL NULL 24 val_24
+NULL NULL 24 val_24
+PREHOOK: query: DROP TABLE dest1_rc
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE dest1_rc
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: default@dest1_rc
Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java?rev=832645&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java Wed Nov 4 01:38:12 2009
@@ -0,0 +1,107 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import java.util.ArrayList;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.StringUtils;
+
+public class ColumnProjectionUtils {
+
+ public static String READ_COLUMN_IDS_CONF_STR = "hive.io.file.readcolumn.ids";
+
+ /**
+ * Sets read columns' ids(start from zero) for RCFile's Reader. Once a column
+ * is included in the list, RCFile's reader will not skip its value.
+ *
+ */
+ public static void setReadColumnIDs(Configuration conf, ArrayList<Integer> ids) {
+ String id = toReadColumnIDString(ids);
+ setReadColumnIDConf(conf, id);
+ }
+
+ /**
+ * Sets read columns' ids(start from zero) for RCFile's Reader. Once a column
+ * is included in the list, RCFile's reader will not skip its value.
+ *
+ */
+ public static void appendReadColumnIDs(Configuration conf,
+ ArrayList<Integer> ids) {
+ String id = toReadColumnIDString(ids);
+ if(id != null) {
+ String old = conf.get(READ_COLUMN_IDS_CONF_STR, null);
+ String newConfStr = id;
+ if (old != null)
+ newConfStr = newConfStr + StringUtils.COMMA_STR + old;
+
+ setReadColumnIDConf(conf, newConfStr);
+ }
+ }
+
+ private static void setReadColumnIDConf(Configuration conf, String id) {
+ if (id == null || id.length() <= 0) {
+ conf.set(READ_COLUMN_IDS_CONF_STR, "");
+ return;
+ }
+
+ conf.set(READ_COLUMN_IDS_CONF_STR, id);
+ }
+
+ private static String toReadColumnIDString(ArrayList<Integer> ids) {
+ String id = null;
+ if (ids != null) {
+ for (int i = 0; i < ids.size(); i++) {
+ if (i == 0) {
+ id = "" + ids.get(i);
+ } else {
+ id = id + StringUtils.COMMA_STR + ids.get(i);
+ }
+ }
+ }
+ return id;
+ }
+
+ /**
+ * Returns an array of column ids(start from zero) which is set in the given
+ * parameter <tt>conf</tt>.
+ */
+ public static ArrayList<Integer> getReadColumnIDs(Configuration conf) {
+ if( conf == null )
+ return new ArrayList<Integer>(0);
+ String skips = conf.get(READ_COLUMN_IDS_CONF_STR, "");
+ String[] list = StringUtils.split(skips);
+ ArrayList<Integer> result = new ArrayList<Integer>(list.length);
+ for (int i = 0; i < list.length; i++) {
+ //it may contain duplicates, remove duplicates
+ Integer toAdd = Integer.parseInt(list[i]);
+ if (!result.contains(toAdd))
+ result.add(toAdd);
+ }
+ return result;
+ }
+
+ /**
+ * Clears the read column ids set in the conf, and will read all columns.
+ */
+ public static void setFullyReadColumns(Configuration conf) {
+ conf.set(READ_COLUMN_IDS_CONF_STR, "");
+ }
+
+}
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java?rev=832645&r1=832644&r2=832645&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java Wed Nov 4 01:38:12 2009
@@ -26,6 +26,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.ByteStream;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
@@ -91,7 +92,10 @@
.getSeparators(), serdeParams.getNullSequence(), serdeParams.isEscaped(),
serdeParams.getEscapeChar());
- cachedLazyStruct = new ColumnarStruct(cachedObjectInspector);
+
+ java.util.ArrayList<Integer> notSkipIDs = ColumnProjectionUtils.getReadColumnIDs(job);
+
+ cachedLazyStruct = new ColumnarStruct(cachedObjectInspector, notSkipIDs);
int size = serdeParams.getColumnTypes().size();
field = new BytesRefWritable[size];
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java?rev=832645&r1=832644&r2=832645&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java Wed Nov 4 01:38:12 2009
@@ -20,6 +20,7 @@
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
@@ -49,7 +50,6 @@
private static final Log LOG = LogFactory.getLog(ColumnarStruct.class);
- boolean initialized = false; // init() function is called?
int[] prjColIDs = null; // list of projected column IDs
/**
@@ -60,19 +60,54 @@
* the ObjectInspector representing the type of this LazyStruct.
*/
public ColumnarStruct(ObjectInspector oi) {
+ this(oi, null);
+ }
+
+ /**
+ * Construct a ColumnarStruct object with the TypeInfo. It creates the first
+ * level object at the first place
+ *
+ * @param oi
+ * the ObjectInspector representing the type of this LazyStruct.
+ * @param notSkippedColumnIDs
+ * the column ids that should not be skipped
+ */
+ public ColumnarStruct(ObjectInspector oi, ArrayList<Integer> notSkippedColumnIDs) {
List<? extends StructField> fieldRefs = ((StructObjectInspector) oi).getAllStructFieldRefs();
int num = fieldRefs.size();
fields = new LazyObject[num];
cachedByteArrayRef = new ByteArrayRef[num];
rawBytesField = new BytesRefWritable[num];
- fieldIsNull = new boolean[num];
+ fieldSkipped = new boolean[num];
inited = new boolean[num];
+
+ //if no columns is set to be skipped, add all columns in 'notSkippedColumnIDs'
+ if (notSkippedColumnIDs == null || notSkippedColumnIDs.size() == 0) {
+ for (int i = 0; i < num; i++)
+ notSkippedColumnIDs.add(i);
+ }
+
for (int i = 0; i < num; i++) {
fields[i] = LazyFactory.createLazyObject(fieldRefs.get(i).getFieldObjectInspector());
cachedByteArrayRef[i] = new ByteArrayRef();
- fieldIsNull[i] = false;
- inited[i] = false;
+ if(!notSkippedColumnIDs.contains(i)){
+ fieldSkipped[i] = true;
+ inited[i] = true;
+ } else
+ inited[i] = false;
}
+
+ // maintain a list of non-NULL column IDs
+ int min = notSkippedColumnIDs.size() > num ? num : notSkippedColumnIDs
+ .size();
+ prjColIDs = new int[min];
+ for (int i = 0, index = 0; i < notSkippedColumnIDs.size(); ++i) {
+ int readCol = notSkippedColumnIDs.get(i).intValue();
+ if (readCol < num) {
+ prjColIDs[index] = readCol;
+ index++;
+ }
+ }
}
/**
@@ -104,7 +139,7 @@
ByteArrayRef[] cachedByteArrayRef = null;
BytesRefWritable[] rawBytesField = null;
boolean[] inited = null;
- boolean[] fieldIsNull = null;
+ boolean[] fieldSkipped = null;
/**
* Get the field out of the row without checking parsed. This is called by
@@ -117,7 +152,7 @@
* @return The value of the field
*/
protected Object uncheckedGetField(int fieldID, Text nullSequence) {
- if (fieldIsNull[fieldID])
+ if (fieldSkipped[fieldID])
return null;
if (!inited[fieldID]) {
BytesRefWritable passedInField = rawBytesField[fieldID];
@@ -132,9 +167,10 @@
}
byte[] data = cachedByteArrayRef[fieldID].getData();
- int fieldLen = data.length;
+ int fieldLen = rawBytesField[fieldID].length;
+
if (fieldLen == nullSequence.getLength()
- && LazyUtils.compare(data, 0,
+ && LazyUtils.compare(data, rawBytesField[fieldID].getStart(),
fieldLen, nullSequence.getBytes(), 0, nullSequence.getLength()) == 0) {
return null;
}
@@ -150,45 +186,15 @@
* =========================================================================
*/
public void init(BytesRefArrayWritable cols) {
- if (initialized) { // short cut for non-first calls
- for (int i = 0; i < prjColIDs.length; ++i ) {
- int fieldIndex = prjColIDs[i];
- rawBytesField[fieldIndex] = cols.unCheckedGet(fieldIndex);
- inited[fieldIndex] = false;
- }
- } else { // first time call init()
- int fieldIndex = 0;
- int min = cols.size() < fields.length ? cols.size() : fields.length;
-
- ArrayList<Integer> tmp_sel_cols = new ArrayList<Integer>();
-
- for (; fieldIndex < min; fieldIndex++) {
-
- // call the faster unCheckedGet()
- // alsert: min <= cols.size()
- BytesRefWritable passedInField = cols.unCheckedGet(fieldIndex);
-
- if (passedInField.length > 0) {
- // if (fields[fieldIndex] == null)
- // fields[fieldIndex] = LazyFactory.createLazyObject(fieldTypeInfos
- // .get(fieldIndex));
- tmp_sel_cols.add(fieldIndex);
- rawBytesField[fieldIndex] = passedInField;
- fieldIsNull[fieldIndex] = false;
- } else
- fieldIsNull[fieldIndex] = true;
-
- inited[fieldIndex] = false;
- }
- for (; fieldIndex < fields.length; fieldIndex++)
- fieldIsNull[fieldIndex] = true;
-
- // maintain a list of non-NULL column IDs
- prjColIDs = new int[tmp_sel_cols.size()];
- for (int i = 0; i < prjColIDs.length; ++i ) {
- prjColIDs[i] = tmp_sel_cols.get(i).intValue();
+ for (int i = 0; i < prjColIDs.length; ++i ) {
+ int fieldIndex = prjColIDs[i];
+ if(fieldIndex < cols.size()){
+ rawBytesField[fieldIndex] = cols.unCheckedGet(fieldIndex);
+ inited[fieldIndex] = false;
+ } else {
+ // select columns that actually do not exist in the file.
+ fieldSkipped[fieldIndex] = true;
}
- initialized = true;
}
}