You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/09/02 21:57:07 UTC
svn commit: r1622108 [14/27] - in /hive/branches/tez: ./
accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/
beeline/src/java/org/apache/hive/beeline/
beeline/src/test/org/apache/hive/beeline/ bin/ bin/ext/ checkstyle/
common/src/java/...
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java Tue Sep 2 19:56:56 2014
@@ -82,7 +82,7 @@ public final class VectorExpressionWrite
this.objectInspector = objectInspector;
return this;
}
-
+
/**
* The base implementation must be overridden by the Long specialization
*/
@@ -90,7 +90,7 @@ public final class VectorExpressionWrite
public Object writeValue(long value) throws HiveException {
throw new HiveException("Internal error: should not reach here");
}
-
+
/**
* The base implementation must be overridden by the Long specialization
*/
@@ -112,7 +112,7 @@ public final class VectorExpressionWrite
public Object setValue(Object field, double value) throws HiveException {
throw new HiveException("Internal error: should not reach here");
}
-
+
/**
* The base implementation must be overridden by the Bytes specialization
*/
@@ -120,7 +120,7 @@ public final class VectorExpressionWrite
public Object writeValue(byte[] value, int start, int length) throws HiveException {
throw new HiveException("Internal error: should not reach here");
}
-
+
/**
* The base implementation must be overridden by the Bytes specialization
*/
@@ -171,7 +171,7 @@ public final class VectorExpressionWrite
"Incorrect null/repeating: row:%d noNulls:%b isRepeating:%b isNull[row]:%b isNull[0]:%b",
row, lcv.noNulls, lcv.isRepeating, lcv.isNull[row], lcv.isNull[0]));
}
-
+
@Override
public Object setValue(Object field, ColumnVector column, int row) throws HiveException {
LongColumnVector lcv = (LongColumnVector) column;
@@ -192,7 +192,7 @@ public final class VectorExpressionWrite
String.format(
"Incorrect null/repeating: row:%d noNulls:%b isRepeating:%b isNull[row]:%b isNull[0]:%b",
row, lcv.noNulls, lcv.isRepeating, lcv.isNull[row], lcv.isNull[0]));
- }
+ }
}
/**
@@ -221,7 +221,7 @@ public final class VectorExpressionWrite
"Incorrect null/repeating: row:%d noNulls:%b isRepeating:%b isNull[row]:%b isNull[0]:%b",
row, dcv.noNulls, dcv.isRepeating, dcv.isNull[row], dcv.isNull[0]));
}
-
+
@Override
public Object setValue(Object field, ColumnVector column, int row) throws HiveException {
DoubleColumnVector dcv = (DoubleColumnVector) column;
@@ -242,7 +242,7 @@ public final class VectorExpressionWrite
String.format(
"Incorrect null/repeating: row:%d noNulls:%b isRepeating:%b isNull[row]:%b isNull[0]:%b",
row, dcv.noNulls, dcv.isRepeating, dcv.isNull[row], dcv.isNull[0]));
- }
+ }
}
/**
@@ -292,7 +292,7 @@ public final class VectorExpressionWrite
String.format(
"Incorrect null/repeating: row:%d noNulls:%b isRepeating:%b isNull[row]:%b isNull[0]:%b",
row, bcv.noNulls, bcv.isRepeating, bcv.isNull[row], bcv.isNull[0]));
- }
+ }
}
@@ -396,7 +396,7 @@ public final class VectorExpressionWrite
(SettableLongObjectInspector) fieldObjInspector);
case VOID:
return genVectorExpressionWritableVoid(
- (VoidObjectInspector) fieldObjInspector);
+ (VoidObjectInspector) fieldObjInspector);
case BINARY:
return genVectorExpressionWritableBinary(
(SettableBinaryObjectInspector) fieldObjInspector);
@@ -419,7 +419,7 @@ public final class VectorExpressionWrite
throw new IllegalArgumentException("Unknown primitive type: " +
((PrimitiveObjectInspector) fieldObjInspector).getPrimitiveCategory());
}
-
+
case STRUCT:
case UNION:
case MAP:
@@ -428,7 +428,7 @@ public final class VectorExpressionWrite
fieldObjInspector.getCategory());
default:
throw new IllegalArgumentException("Unknown type " +
- fieldObjInspector.getCategory());
+ fieldObjInspector.getCategory());
}
}
@@ -526,7 +526,7 @@ public final class VectorExpressionWrite
private Object obj;
private Timestamp ts;
- public VectorExpressionWriter init(SettableTimestampObjectInspector objInspector)
+ public VectorExpressionWriter init(SettableTimestampObjectInspector objInspector)
throws HiveException {
super.init(objInspector);
ts = new Timestamp(0);
@@ -550,7 +550,7 @@ public final class VectorExpressionWrite
((SettableTimestampObjectInspector) this.objectInspector).set(field, ts);
return field;
}
-
+
@Override
public Object initValue(Object ignored) {
return ((SettableTimestampObjectInspector) this.objectInspector).create(new Timestamp(0));
@@ -563,15 +563,15 @@ public final class VectorExpressionWrite
return new VectorExpressionWriterBytes() {
private Object obj;
private Text text;
-
- public VectorExpressionWriter init(SettableHiveVarcharObjectInspector objInspector)
+
+ public VectorExpressionWriter init(SettableHiveVarcharObjectInspector objInspector)
throws HiveException {
super.init(objInspector);
this.text = new Text();
this.obj = initValue(null);
return this;
}
-
+
@Override
public Object writeValue(byte[] value, int start, int length) throws HiveException {
text.set(value, start, length);
@@ -580,7 +580,7 @@ public final class VectorExpressionWrite
}
@Override
- public Object setValue(Object field, byte[] value, int start, int length)
+ public Object setValue(Object field, byte[] value, int start, int length)
throws HiveException {
if (null == field) {
field = initValue(null);
@@ -589,7 +589,7 @@ public final class VectorExpressionWrite
((SettableHiveVarcharObjectInspector) this.objectInspector).set(field, text.toString());
return field;
}
-
+
@Override
public Object initValue(Object ignored) {
return ((SettableHiveVarcharObjectInspector) this.objectInspector)
@@ -603,24 +603,24 @@ public final class VectorExpressionWrite
return new VectorExpressionWriterBytes() {
private Object obj;
private Text text;
-
- public VectorExpressionWriter init(SettableStringObjectInspector objInspector)
+
+ public VectorExpressionWriter init(SettableStringObjectInspector objInspector)
throws HiveException {
super.init(objInspector);
this.text = new Text();
this.obj = initValue(null);
return this;
}
-
+
@Override
public Object writeValue(byte[] value, int start, int length) throws HiveException {
this.text.set(value, start, length);
((SettableStringObjectInspector) this.objectInspector).set(this.obj, this.text.toString());
return this.obj;
}
-
+
@Override
- public Object setValue(Object field, byte[] value, int start, int length)
+ public Object setValue(Object field, byte[] value, int start, int length)
throws HiveException {
if (null == field) {
field = initValue(null);
@@ -628,12 +628,12 @@ public final class VectorExpressionWrite
this.text.set(value, start, length);
((SettableStringObjectInspector) this.objectInspector).set(field, this.text.toString());
return field;
- }
-
+ }
+
@Override
public Object initValue(Object ignored) {
return ((SettableStringObjectInspector) this.objectInspector).create(StringUtils.EMPTY);
- }
+ }
}.init(fieldObjInspector);
}
@@ -642,22 +642,22 @@ public final class VectorExpressionWrite
return new VectorExpressionWriterBytes() {
private Object obj;
private byte[] bytes;
-
- public VectorExpressionWriter init(SettableBinaryObjectInspector objInspector)
+
+ public VectorExpressionWriter init(SettableBinaryObjectInspector objInspector)
throws HiveException {
super.init(objInspector);
this.bytes = ArrayUtils.EMPTY_BYTE_ARRAY;
this.obj = initValue(null);
return this;
}
-
+
@Override
public Object writeValue(byte[] value, int start, int length) throws HiveException {
bytes = Arrays.copyOfRange(value, start, start + length);
((SettableBinaryObjectInspector) this.objectInspector).set(this.obj, bytes);
return this.obj;
}
-
+
@Override
public Object setValue(Object field, byte[] value, int start, int length) throws HiveException {
if (null == field) {
@@ -666,7 +666,7 @@ public final class VectorExpressionWrite
bytes = Arrays.copyOfRange(value, start, start + length);
((SettableBinaryObjectInspector) this.objectInspector).set(field, bytes);
return field;
- }
+ }
@Override
public Object initValue(Object ignored) {
@@ -680,20 +680,20 @@ public final class VectorExpressionWrite
SettableLongObjectInspector fieldObjInspector) throws HiveException {
return new VectorExpressionWriterLong() {
private Object obj;
-
- public VectorExpressionWriter init(SettableLongObjectInspector objInspector)
+
+ public VectorExpressionWriter init(SettableLongObjectInspector objInspector)
throws HiveException {
super.init(objInspector);
this.obj = initValue(null);
return this;
}
-
+
@Override
public Object writeValue(long value) throws HiveException {
((SettableLongObjectInspector) this.objectInspector).set(this.obj, value);
return this.obj;
}
-
+
@Override
public Object setValue(Object field, long value) throws HiveException {
if (null == field) {
@@ -712,56 +712,55 @@ public final class VectorExpressionWrite
}
private static VectorExpressionWriter genVectorExpressionWritableVoid(
- VoidObjectInspector fieldObjInspector) throws HiveException {
- return new VectorExpressionWriterLong() {
- private Object obj;
-
- public VectorExpressionWriter init(VoidObjectInspector objInspector)
- throws HiveException {
- super.init(objInspector);
- this.obj = initValue(null);
- return this;
- }
-
- @Override
- public Object writeValue(long value) throws HiveException {
- return this.obj;
- }
-
- @Override
- public Object setValue(Object field, long value) throws HiveException {
- if (null == field) {
- field = initValue(null);
- }
- return field;
- }
-
- @Override
- public Object initValue(Object ignored) {
- return ((VoidObjectInspector) this.objectInspector).copyObject(null);
- }
- }.init(fieldObjInspector);
- }
-
-
+ VoidObjectInspector fieldObjInspector) throws HiveException {
+ return new VectorExpressionWriterLong() {
+ private Object obj;
+
+ public VectorExpressionWriter init(VoidObjectInspector objInspector) throws HiveException {
+ super.init(objInspector);
+ this.obj = initValue(null);
+ return this;
+ }
+
+ @Override
+ public Object writeValue(long value) throws HiveException {
+ return this.obj;
+ }
+
+ @Override
+ public Object setValue(Object field, long value) throws HiveException {
+ if (null == field) {
+ field = initValue(null);
+ }
+ return field;
+ }
+
+ @Override
+ public Object initValue(Object ignored) {
+ return ((VoidObjectInspector) this.objectInspector).copyObject(null);
+ }
+ }.init(fieldObjInspector);
+ }
+
+
private static VectorExpressionWriter genVectorExpressionWritableInt(
SettableIntObjectInspector fieldObjInspector) throws HiveException {
return new VectorExpressionWriterLong() {
private Object obj;
-
- public VectorExpressionWriter init(SettableIntObjectInspector objInspector)
+
+ public VectorExpressionWriter init(SettableIntObjectInspector objInspector)
throws HiveException {
super.init(objInspector);
this.obj = initValue(null);
return this;
}
-
+
@Override
public Object writeValue(long value) throws HiveException {
((SettableIntObjectInspector) this.objectInspector).set(this.obj, (int) value);
return this.obj;
}
-
+
@Override
public Object setValue(Object field, long value) throws HiveException {
if (null == field) {
@@ -770,7 +769,7 @@ public final class VectorExpressionWrite
((SettableIntObjectInspector) this.objectInspector).set(field, (int) value);
return field;
}
-
+
@Override
public Object initValue(Object ignored) {
return ((SettableIntObjectInspector) this.objectInspector)
@@ -783,20 +782,20 @@ public final class VectorExpressionWrite
SettableShortObjectInspector fieldObjInspector) throws HiveException {
return new VectorExpressionWriterLong() {
private Object obj;
-
- public VectorExpressionWriter init(SettableShortObjectInspector objInspector)
+
+ public VectorExpressionWriter init(SettableShortObjectInspector objInspector)
throws HiveException {
super.init(objInspector);
this.obj = initValue(null);
return this;
}
-
+
@Override
public Object writeValue(long value) throws HiveException {
((SettableShortObjectInspector) this.objectInspector).set(this.obj, (short) value);
return this.obj;
}
-
+
@Override
public Object setValue(Object field, long value) throws HiveException {
if (null == field) {
@@ -805,7 +804,7 @@ public final class VectorExpressionWrite
((SettableShortObjectInspector) this.objectInspector).set(field, (short) value);
return field;
}
-
+
@Override
public Object initValue(Object ignored) {
return ((SettableShortObjectInspector) this.objectInspector)
@@ -818,20 +817,20 @@ public final class VectorExpressionWrite
SettableByteObjectInspector fieldObjInspector) throws HiveException {
return new VectorExpressionWriterLong() {
private Object obj;
-
- public VectorExpressionWriter init(SettableByteObjectInspector objInspector)
+
+ public VectorExpressionWriter init(SettableByteObjectInspector objInspector)
throws HiveException {
super.init(objInspector);
this.obj = initValue(null);
return this;
}
-
+
@Override
public Object writeValue(long value) throws HiveException {
((SettableByteObjectInspector) this.objectInspector).set(this.obj, (byte) value);
return this.obj;
}
-
+
@Override
public Object setValue(Object field, long value) throws HiveException {
if (null == field) {
@@ -840,7 +839,7 @@ public final class VectorExpressionWrite
((SettableByteObjectInspector) this.objectInspector).set(field, (byte) value);
return field;
}
-
+
@Override
public Object initValue(Object ignored) {
return ((SettableByteObjectInspector) this.objectInspector)
@@ -853,31 +852,31 @@ public final class VectorExpressionWrite
SettableBooleanObjectInspector fieldObjInspector) throws HiveException {
return new VectorExpressionWriterLong() {
private Object obj;
-
- public VectorExpressionWriter init(SettableBooleanObjectInspector objInspector)
+
+ public VectorExpressionWriter init(SettableBooleanObjectInspector objInspector)
throws HiveException {
super.init(objInspector);
this.obj = initValue(null);
return this;
}
-
+
@Override
public Object writeValue(long value) throws HiveException {
- ((SettableBooleanObjectInspector) this.objectInspector).set(this.obj,
+ ((SettableBooleanObjectInspector) this.objectInspector).set(this.obj,
value == 0 ? false : true);
return this.obj;
}
-
+
@Override
public Object setValue(Object field, long value) throws HiveException {
if (null == field) {
field = initValue(null);
}
- ((SettableBooleanObjectInspector) this.objectInspector).set(field,
+ ((SettableBooleanObjectInspector) this.objectInspector).set(field,
value == 0 ? false : true);
return field;
}
-
+
@Override
public Object initValue(Object ignored) {
return ((SettableBooleanObjectInspector) this.objectInspector)
@@ -890,20 +889,20 @@ public final class VectorExpressionWrite
SettableDoubleObjectInspector fieldObjInspector) throws HiveException {
return new VectorExpressionWriterDouble() {
private Object obj;
-
- public VectorExpressionWriter init(SettableDoubleObjectInspector objInspector)
+
+ public VectorExpressionWriter init(SettableDoubleObjectInspector objInspector)
throws HiveException {
super.init(objInspector);
this.obj = initValue(null);
return this;
}
-
+
@Override
public Object writeValue(double value) throws HiveException {
((SettableDoubleObjectInspector) this.objectInspector).set(this.obj, value);
return this.obj;
}
-
+
@Override
public Object setValue(Object field, double value) throws HiveException {
if (null == field) {
@@ -911,8 +910,8 @@ public final class VectorExpressionWrite
}
((SettableDoubleObjectInspector) this.objectInspector).set(field, value);
return field;
- }
-
+ }
+
@Override
public Object initValue(Object ignored) {
return ((SettableDoubleObjectInspector) this.objectInspector)
@@ -925,20 +924,20 @@ public final class VectorExpressionWrite
SettableFloatObjectInspector fieldObjInspector) throws HiveException {
return new VectorExpressionWriterDouble() {
private Object obj;
-
- public VectorExpressionWriter init(SettableFloatObjectInspector objInspector)
+
+ public VectorExpressionWriter init(SettableFloatObjectInspector objInspector)
throws HiveException {
super.init(objInspector);
this.obj = initValue(null);
return this;
}
-
+
@Override
public Object writeValue(double value) throws HiveException {
((SettableFloatObjectInspector) this.objectInspector).set(this.obj, (float) value);
return this.obj;
}
-
+
@Override
public Object setValue(Object field, double value) throws HiveException {
if (null == field) {
@@ -947,7 +946,7 @@ public final class VectorExpressionWrite
((SettableFloatObjectInspector) this.objectInspector).set(field, (float) value);
return field;
}
-
+
@Override
public Object initValue(Object ignored) {
return ((SettableFloatObjectInspector) this.objectInspector)
@@ -1027,25 +1026,25 @@ public final class VectorExpressionWrite
*/
public static VectorExpressionWriter[] getExpressionWriters(StructObjectInspector objInspector)
throws HiveException {
-
+
if (objInspector.isSettable()) {
return getSettableExpressionWriters((SettableStructObjectInspector) objInspector);
}
-
+
List<? extends StructField> allFieldRefs = objInspector.getAllStructFieldRefs();
-
+
VectorExpressionWriter[] expressionWriters = new VectorExpressionWriter[allFieldRefs.size()];
-
+
for(int i=0; i<expressionWriters.length; ++i) {
expressionWriters[i] = genVectorExpressionWritable(allFieldRefs.get(i).getFieldObjectInspector());
}
-
+
return expressionWriters;
}
public static VectorExpressionWriter[] getSettableExpressionWriters(
SettableStructObjectInspector objInspector) throws HiveException {
- List<? extends StructField> fieldsRef = objInspector.getAllStructFieldRefs();
+ List<? extends StructField> fieldsRef = objInspector.getAllStructFieldRefs();
VectorExpressionWriter[] writers = new VectorExpressionWriter[fieldsRef.size()];
for(int i=0; i<writers.length; ++i) {
StructField fieldRef = fieldsRef.get(i);
@@ -1054,19 +1053,19 @@ public final class VectorExpressionWrite
writers[i] = genVectorExpressionWritable(objInspector, fieldRef, baseWriter);
}
return writers;
-
+
}
-
+
/**
- * VectorExpressionWriterSetter helper for vector expression writers that use
+ * VectorExpressionWriterSetter helper for vector expression writers that use
* settable ObjectInspector fields to assign the values.
- * This is used by the OrcStruct serialization (eg. CREATE TABLE ... AS ...)
+ * This is used by the OrcStruct serialization (eg. CREATE TABLE ... AS ...)
*/
private static class VectorExpressionWriterSetter extends VectorExpressionWriterBase {
private SettableStructObjectInspector settableObjInspector;
private StructField fieldRef;
private VectorExpressionWriter baseWriter;
-
+
public VectorExpressionWriterSetter init(
SettableStructObjectInspector objInspector,
StructField fieldRef,
@@ -1087,15 +1086,15 @@ public final class VectorExpressionWrite
@Override
public Object setValue(Object row, ColumnVector column, int columnRow)
throws HiveException {
-
+
// NULLs are handled by each individual base writer setter
// We could handle NULLs centrally here but that would result in spurious allocs
-
+
Object fieldValue = this.settableObjInspector.getStructFieldData(row, fieldRef);
fieldValue = baseWriter.setValue(fieldValue, column, columnRow);
return this.settableObjInspector.setStructFieldData(row, fieldRef, fieldValue);
}
-
+
@Override
public Object initValue(Object struct) throws HiveException {
Object initValue = this.baseWriter.initValue(null);
@@ -1103,7 +1102,7 @@ public final class VectorExpressionWrite
return struct;
}
}
-
+
private static VectorExpressionWriter genVectorExpressionWritable(
SettableStructObjectInspector objInspector,
StructField fieldRef,
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/hooks/Entity.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/hooks/Entity.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/hooks/Entity.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/hooks/Entity.java Tue Sep 2 19:56:56 2014
@@ -22,6 +22,7 @@ import java.io.Serializable;
import java.net.URI;
import java.util.Map;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.ql.metadata.DummyPartition;
import org.apache.hadoop.hive.ql.metadata.Partition;
@@ -276,11 +277,13 @@ public class Entity implements Serializa
}
if (typ == Type.TABLE) {
- return t.getDataLocation().toUri();
+ Path path = t.getDataLocation();
+ return path == null ? null : path.toUri();
}
if (typ == Type.PARTITION) {
- return p.getDataLocation().toUri();
+ Path path = p.getDataLocation();
+ return path == null ? null : path.toUri();
}
if (typ == Type.DFS_DIR || typ == Type.LOCAL_DIR) {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java Tue Sep 2 19:56:56 2014
@@ -19,7 +19,9 @@
package org.apache.hadoop.hive.ql.hooks;
import java.io.Serializable;
+import java.util.ArrayList;
import java.util.HashSet;
+import java.util.List;
import java.util.Set;
import org.apache.hadoop.fs.Path;
@@ -49,7 +51,8 @@ public class ReadEntity extends Entity i
// For views, the entities can be nested - by default, entities are at the top level
private final Set<ReadEntity> parents = new HashSet<ReadEntity>();
-
+ // The accessed columns of query
+ private final List<String> accessedColumns = new ArrayList<String>();
/**
* For serialization only.
@@ -159,4 +162,8 @@ public class ReadEntity extends Entity i
public void noLockNeeded() {
needsLock = false;
}
+
+ public List<String> getAccessedColumns() {
+ return accessedColumns;
+ }
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/hooks/UpdateInputAccessTimeHook.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/hooks/UpdateInputAccessTimeHook.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/hooks/UpdateInputAccessTimeHook.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/hooks/UpdateInputAccessTimeHook.java Tue Sep 2 19:56:56 2014
@@ -66,7 +66,7 @@ public class UpdateInputAccessTimeHook {
case TABLE: {
Table t = db.getTable(re.getTable().getTableName());
t.setLastAccessTime(lastAccessTime);
- db.alterTable(t.getTableName(), t);
+ db.alterTable(t.getDbName() + "." + t.getTableName(), t);
break;
}
case PARTITION: {
@@ -76,7 +76,7 @@ public class UpdateInputAccessTimeHook {
p.setLastAccessTime(lastAccessTime);
db.alterPartition(t.getTableName(), p);
t.setLastAccessTime(lastAccessTime);
- db.alterTable(t.getTableName(), t);
+ db.alterTable(t.getDbName() + "." + t.getTableName(), t);
break;
}
default:
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/IndexMetadataChangeTask.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/IndexMetadataChangeTask.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/IndexMetadataChangeTask.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/IndexMetadataChangeTask.java Tue Sep 2 19:56:56 2014
@@ -77,7 +77,7 @@ public class IndexMetadataChangeTask ext
FileSystem fs = url.getFileSystem(conf);
FileStatus fstat = fs.getFileStatus(url);
tbl.getParameters().put(HiveIndex.INDEX_TABLE_CREATETIME, Long.toString(fstat.getModificationTime()));
- db.alterTable(tbl.getTableName(), tbl);
+ db.alterTable(tbl.getDbName() + "." + tbl.getTableName(), tbl);
}
} catch (Exception e) {
e.printStackTrace();
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java Tue Sep 2 19:56:56 2014
@@ -233,7 +233,8 @@ public class BitmapIndexHandler extends
StringBuilder command= new StringBuilder();
LinkedHashMap<String, String> partSpec = indexTblPartDesc.getPartSpec();
- command.append("INSERT OVERWRITE TABLE " + HiveUtils.unparseIdentifier(indexTableName ));
+ command.append("INSERT OVERWRITE TABLE " +
+ HiveUtils.unparseIdentifier(dbName) + "." + HiveUtils.unparseIdentifier(indexTableName ));
if (partitioned && indexTblPartDesc != null) {
command.append(" PARTITION ( ");
List<String> ret = getPartKVPairStringArray(partSpec);
@@ -257,7 +258,8 @@ public class BitmapIndexHandler extends
command.append("EWAH_BITMAP(");
command.append(VirtualColumn.ROWOFFSET.getName());
command.append(")");
- command.append(" FROM " + HiveUtils.unparseIdentifier(baseTableName) );
+ command.append(" FROM " +
+ HiveUtils.unparseIdentifier(dbName) + "." + HiveUtils.unparseIdentifier(baseTableName));
LinkedHashMap<String, String> basePartSpec = baseTablePartDesc.getPartSpec();
if(basePartSpec != null) {
command.append(" WHERE ");
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java Tue Sep 2 19:56:56 2014
@@ -103,7 +103,8 @@ public class CompactIndexHandler extends
StringBuilder command= new StringBuilder();
LinkedHashMap<String, String> partSpec = indexTblPartDesc.getPartSpec();
- command.append("INSERT OVERWRITE TABLE " + HiveUtils.unparseIdentifier(indexTableName ));
+ command.append("INSERT OVERWRITE TABLE " +
+ HiveUtils.unparseIdentifier(dbName) + "." + HiveUtils.unparseIdentifier(indexTableName ));
if (partitioned && indexTblPartDesc != null) {
command.append(" PARTITION ( ");
List<String> ret = getPartKVPairStringArray(partSpec);
@@ -126,7 +127,8 @@ public class CompactIndexHandler extends
command.append(" collect_set (");
command.append(VirtualColumn.BLOCKOFFSET.getName());
command.append(") ");
- command.append(" FROM " + HiveUtils.unparseIdentifier(baseTableName) );
+ command.append(" FROM " +
+ HiveUtils.unparseIdentifier(dbName) + "." + HiveUtils.unparseIdentifier(baseTableName));
LinkedHashMap<String, String> basePartSpec = baseTablePartDesc.getPartSpec();
if(basePartSpec != null) {
command.append(" WHERE ");
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveIgnoreKeyTextOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveIgnoreKeyTextOutputFormat.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveIgnoreKeyTextOutputFormat.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveIgnoreKeyTextOutputFormat.java Tue Sep 2 19:56:56 2014
@@ -78,7 +78,7 @@ public class HiveIgnoreKeyTextOutputForm
final int finalRowSeparator = rowSeparator;
FileSystem fs = outPath.getFileSystem(jc);
final OutputStream outStream = Utilities.createCompressedStream(jc,
- fs.create(outPath, progress), isCompressed);
+ fs.create(outPath, progress), isCompressed);
return new RecordWriter() {
@Override
public void write(Writable r) throws IOException {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveNullValueSequenceFileOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveNullValueSequenceFileOutputFormat.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveNullValueSequenceFileOutputFormat.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveNullValueSequenceFileOutputFormat.java Tue Sep 2 19:56:56 2014
@@ -54,7 +54,7 @@ public class HiveNullValueSequenceFileOu
FileSystem fs = finalOutPath.getFileSystem(jc);
final SequenceFile.Writer outStream = Utilities.createSequenceWriter(jc, fs, finalOutPath,
- HiveKey.class, NullWritable.class, isCompressed, progress);
+ HiveKey.class, NullWritable.class, isCompressed, progress);
keyWritable = new HiveKey();
keyIsText = valueClass.equals(Text.class);
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveSequenceFileOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveSequenceFileOutputFormat.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveSequenceFileOutputFormat.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveSequenceFileOutputFormat.java Tue Sep 2 19:56:56 2014
@@ -62,7 +62,7 @@ public class HiveSequenceFileOutputForma
FileSystem fs = finalOutPath.getFileSystem(jc);
final SequenceFile.Writer outStream = Utilities.createSequenceWriter(jc, fs, finalOutPath,
- BytesWritable.class, valueClass, isCompressed, progress);
+ BytesWritable.class, valueClass, isCompressed, progress);
return new RecordWriter() {
@Override
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileOutputFormat.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileOutputFormat.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileOutputFormat.java Tue Sep 2 19:56:56 2014
@@ -133,7 +133,7 @@ public class RCFileOutputFormat extends
RCFileOutputFormat.setColumnNumber(jc, cols.length);
final RCFile.Writer outWriter = Utilities.createRCFileWriter(jc,
- finalOutPath.getFileSystem(jc), finalOutPath, isCompressed, progress);
+ finalOutPath.getFileSystem(jc), finalOutPath, isCompressed, progress);
return new org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter() {
@Override
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileRecordReader.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileRecordReader.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileRecordReader.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileRecordReader.java Tue Sep 2 19:56:56 2014
@@ -64,7 +64,7 @@ public class RCFileRecordReader<K extend
private final Map<String, RCFileSyncEntry> cache;
public RCFileSyncCache() {
- cache = Collections.synchronizedMap(new WeakHashMap<String, RCFileSyncEntry>());
+ cache = Collections.synchronizedMap(new WeakHashMap<String, RCFileSyncEntry>());
}
public void put(FileSplit split, long endSync) {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeMapper.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeMapper.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeMapper.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeMapper.java Tue Sep 2 19:56:56 2014
@@ -18,8 +18,6 @@
package org.apache.hadoop.hive.ql.io.merge;
-import java.io.IOException;
-
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileStatus;
@@ -31,6 +29,10 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
public class MergeMapper extends MapReduceBase {
protected JobConf jc;
protected Path finalPath;
@@ -48,6 +50,7 @@ public class MergeMapper extends MapRedu
protected Path tmpPath;
protected Path taskTmpPath;
protected Path dpPath;
+ protected Set<Path> incompatFileSet;
public final static Log LOG = LogFactory.getLog("MergeMapper");
@@ -62,6 +65,7 @@ public class MergeMapper extends MapRedu
HiveConf.ConfVars.HIVEMERGECURRENTJOBCONCATENATELISTBUCKETINGDEPTH);
Path specPath = MergeOutputFormat.getMergeOutputPath(job);
+ incompatFileSet = new HashSet<Path>();
Path tmpPath = Utilities.toTempPath(specPath);
Path taskTmpPath = Utilities.toTaskTempPath(specPath);
updatePaths(tmpPath, taskTmpPath);
@@ -176,6 +180,23 @@ public class MergeMapper extends MapRedu
if (!fs.rename(outPath, finalPath)) {
throw new IOException("Unable to rename output to " + finalPath);
}
+
+ // move any incompatible files to final path
+ if (!incompatFileSet.isEmpty()) {
+ for (Path incompatFile : incompatFileSet) {
+ String fileName = incompatFile.getName();
+ Path destFile = new Path(finalPath.getParent(), fileName);
+ try {
+ Utilities.renameOrMoveFiles(fs, incompatFile, destFile);
+ LOG.info("Moved incompatible file " + incompatFile + " to "
+ + destFile);
+ } catch (HiveException e) {
+ LOG.error("Unable to move " + incompatFile + " to " + destFile);
+ throw new IOException(e);
+ }
+ }
+ }
+
} else {
if (!autoDelete) {
fs.delete(outPath, true);
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeTask.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeTask.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeTask.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeTask.java Tue Sep 2 19:56:56 2014
@@ -373,6 +373,10 @@ public class MergeTask extends Task<Merg
}
}
+ if (format == null || format.trim().equals("")) {
+ printUsage();
+ }
+
MergeWork mergeWork = null;
if (format.equals("rcfile")) {
mergeWork = new MergeWork(inputPaths, new Path(outputDir), RCFileInputFormat.class);
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java Tue Sep 2 19:56:56 2014
@@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hive.ql.io.orc;
+import java.sql.Timestamp;
+
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -716,6 +718,99 @@ class ColumnStatisticsImpl implements Co
}
}
+ private static final class TimestampStatisticsImpl extends ColumnStatisticsImpl
+ implements TimestampColumnStatistics {
+ private Long minimum = null;
+ private Long maximum = null;
+
+ TimestampStatisticsImpl() {
+ }
+
+ TimestampStatisticsImpl(OrcProto.ColumnStatistics stats) {
+ super(stats);
+ OrcProto.TimestampStatistics timestampStats = stats.getTimestampStatistics();
+ // min,max values serialized/deserialized as int (milliseconds since epoch)
+ if (timestampStats.hasMaximum()) {
+ maximum = timestampStats.getMaximum();
+ }
+ if (timestampStats.hasMinimum()) {
+ minimum = timestampStats.getMinimum();
+ }
+ }
+
+ @Override
+ void reset() {
+ super.reset();
+ minimum = null;
+ maximum = null;
+ }
+
+ @Override
+ void updateTimestamp(Timestamp value) {
+ if (minimum == null) {
+ minimum = value.getTime();
+ maximum = value.getTime();
+ } else if (minimum > value.getTime()) {
+ minimum = value.getTime();
+ } else if (maximum < value.getTime()) {
+ maximum = value.getTime();
+ }
+ }
+
+ @Override
+ void merge(ColumnStatisticsImpl other) {
+ super.merge(other);
+ TimestampStatisticsImpl timestampStats = (TimestampStatisticsImpl) other;
+ if (minimum == null) {
+ minimum = timestampStats.minimum;
+ maximum = timestampStats.maximum;
+ } else if (timestampStats.minimum != null) {
+ if (minimum > timestampStats.minimum) {
+ minimum = timestampStats.minimum;
+ } else if (maximum < timestampStats.maximum) {
+ maximum = timestampStats.maximum;
+ }
+ }
+ }
+
+ @Override
+ OrcProto.ColumnStatistics.Builder serialize() {
+ OrcProto.ColumnStatistics.Builder result = super.serialize();
+ OrcProto.TimestampStatistics.Builder timestampStats = OrcProto.TimestampStatistics
+ .newBuilder();
+ if (getNumberOfValues() != 0) {
+ timestampStats.setMinimum(minimum);
+ timestampStats.setMaximum(maximum);
+ }
+ result.setTimestampStatistics(timestampStats);
+ return result;
+ }
+
+ @Override
+ public Timestamp getMinimum() {
+ Timestamp minTimestamp = new Timestamp(minimum);
+ return minTimestamp;
+ }
+
+ @Override
+ public Timestamp getMaximum() {
+ Timestamp maxTimestamp = new Timestamp(maximum);
+ return maxTimestamp;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder(super.toString());
+ if (getNumberOfValues() != 0) {
+ buf.append(" min: ");
+ buf.append(minimum);
+ buf.append(" max: ");
+ buf.append(maximum);
+ }
+ return buf.toString();
+ }
+ }
+
private long count = 0;
ColumnStatisticsImpl(OrcProto.ColumnStatistics stats) {
@@ -759,6 +854,10 @@ class ColumnStatisticsImpl implements Co
throw new UnsupportedOperationException("Can't update date");
}
+ void updateTimestamp(Timestamp value) {
+ throw new UnsupportedOperationException("Can't update timestamp");
+ }
+
void merge(ColumnStatisticsImpl stats) {
count += stats.count;
}
@@ -806,6 +905,8 @@ class ColumnStatisticsImpl implements Co
return new DecimalStatisticsImpl();
case DATE:
return new DateStatisticsImpl();
+ case TIMESTAMP:
+ return new TimestampStatisticsImpl();
case BINARY:
return new BinaryStatisticsImpl();
default:
@@ -829,6 +930,8 @@ class ColumnStatisticsImpl implements Co
return new DecimalStatisticsImpl(stats);
} else if (stats.hasDateStatistics()) {
return new DateStatisticsImpl(stats);
+ } else if (stats.hasTimestampStatistics()) {
+ return new TimestampStatisticsImpl(stats);
} else if(stats.hasBinaryStatistics()) {
return new BinaryStatisticsImpl(stats);
} else {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java Tue Sep 2 19:56:56 2014
@@ -120,14 +120,14 @@ public final class FileDump {
RowIndex[] indices = rows.readRowIndex(stripeIx);
for (int col : rowIndexCols) {
StringBuilder buf = new StringBuilder();
- buf.append(" Column ").append(col).append(": row index");
+ buf.append(" Row group index column ").append(col).append(":");
RowIndex index = null;
if ((col >= indices.length) || ((index = indices[col]) == null)) {
buf.append(" not found\n");
continue;
}
for (int entryIx = 0; entryIx < index.getEntryCount(); ++entryIx) {
- buf.append(" RG ").append(entryIx).append(": ");
+ buf.append("\n Entry ").append(entryIx).append(":");
RowIndexEntry entry = index.getEntry(entryIx);
if (entry == null) {
buf.append("unknown\n");
@@ -139,15 +139,17 @@ public final class FileDump {
} else {
ColumnStatistics cs = ColumnStatisticsImpl.deserialize(colStats);
Object min = RecordReaderImpl.getMin(cs), max = RecordReaderImpl.getMax(cs);
- buf.append("[").append(min).append(", ").append(max).append(") at ");
+ buf.append(" count: ").append(cs.getNumberOfValues());
+ buf.append(" min: ").append(min);
+ buf.append(" max: ").append(max);
}
+ buf.append(" positions: ");
for (int posIx = 0; posIx < entry.getPositionsCount(); ++posIx) {
if (posIx != 0) {
buf.append(",");
}
buf.append(entry.getPositions(posIx));
}
- buf.append("\n");
}
System.out.println(buf);
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileMergeMapper.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileMergeMapper.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileMergeMapper.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileMergeMapper.java Tue Sep 2 19:56:56 2014
@@ -18,20 +18,24 @@
package org.apache.hadoop.hive.ql.io.orc;
-import java.io.IOException;
-import java.util.List;
-
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.merge.MergeMapper;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.shims.CombineHiveKey;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
/**
* Map task fast merging of ORC files.
*/
@@ -96,31 +100,9 @@ public class OrcFileMergeMapper extends
.inspector(reader.getObjectInspector()));
}
- // check compatibility with subsequent files
- if ((k.getTypes().get(0).getSubtypesCount() != columnCount)) {
- throw new IOException("ORCFileMerge failed because the input files are not compatible."
- + " Column counts does not match.");
- }
-
- if (!k.compression.equals(compression)) {
- throw new IOException("ORCFileMerge failed because the input files are not compatible."
- + " Compression codec does not match.");
- }
-
- if (k.compressBufferSize != compressBuffSize) {
- throw new IOException("ORCFileMerge failed because the input files are not compatible."
- + " Compression buffer size does not match.");
-
- }
-
- if (!k.versionList.equals(version)) {
- throw new IOException("ORCFileMerge failed because the input files are not compatible."
- + " Version does not match.");
- }
-
- if (k.rowIndexStride != rowIndexStride) {
- throw new IOException("ORCFileMerge failed because the input files are not compatible."
- + " Row index stride does not match.");
+ if (!checkCompatibility(k, value)) {
+ incompatFileSet.add(k.getInputPath());
+ return;
}
// next file in the path
@@ -153,6 +135,43 @@ public class OrcFileMergeMapper extends
}
}
+ private boolean checkCompatibility(OrcFileKeyWrapper k,
+ OrcFileValueWrapper value) {
+ // check compatibility with subsequent files
+ if ((k.getTypes().get(0).getSubtypesCount() != columnCount)) {
+ LOG.info("Incompatible ORC file merge! Column counts does not match for "
+ + k.getInputPath());
+ return false;
+ }
+
+ if (!k.compression.equals(compression)) {
+ LOG.info("Incompatible ORC file merge! Compression codec does not match" +
+ " for " + k.getInputPath());
+ return false;
+ }
+
+ if (k.compressBufferSize != compressBuffSize) {
+ LOG.info("Incompatible ORC file merge! Compression buffer size does not" +
+ " match for " + k.getInputPath());
+ return false;
+
+ }
+
+ if (!k.versionList.equals(version)) {
+ LOG.info("Incompatible ORC file merge! Version does not match for "
+ + k.getInputPath());
+ return false;
+ }
+
+ if (k.rowIndexStride != rowIndexStride) {
+ LOG.info("Incompatible ORC file merge! Row index stride does not match" +
+ " for " + k.getInputPath());
+ return false;
+ }
+
+ return true;
+ }
+
@Override
public void close() throws IOException {
// close writer
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Tue Sep 2 19:56:56 2014
@@ -910,31 +910,31 @@ public class OrcInputFormat implements
static List<OrcSplit> generateSplitsInfo(Configuration conf)
throws IOException {
- // use threads to resolve directories into splits
- Context context = new Context(conf);
- for(Path dir: getInputPaths(conf)) {
- FileSystem fs = dir.getFileSystem(conf);
- context.schedule(new FileGenerator(context, fs, dir));
- }
- context.waitForTasks();
- // deal with exceptions
- if (!context.errors.isEmpty()) {
- List<IOException> errors =
- new ArrayList<IOException>(context.errors.size());
- for(Throwable th: context.errors) {
- if (th instanceof IOException) {
- errors.add((IOException) th);
- } else {
- throw new RuntimeException("serious problem", th);
- }
- }
- throw new InvalidInputException(errors);
- }
+ // use threads to resolve directories into splits
+ Context context = new Context(conf);
+ for(Path dir: getInputPaths(conf)) {
+ FileSystem fs = dir.getFileSystem(conf);
+ context.schedule(new FileGenerator(context, fs, dir));
+ }
+ context.waitForTasks();
+ // deal with exceptions
+ if (!context.errors.isEmpty()) {
+ List<IOException> errors =
+ new ArrayList<IOException>(context.errors.size());
+ for(Throwable th: context.errors) {
+ if (th instanceof IOException) {
+ errors.add((IOException) th);
+ } else {
+ throw new RuntimeException("serious problem", th);
+ }
+ }
+ throw new InvalidInputException(errors);
+ }
if (context.cacheStripeDetails) {
LOG.info("FooterCacheHitRatio: " + context.cacheHitCounter.get() + "/"
+ context.numFilesCounter.get());
}
- return context.splits;
+ return context.splits;
}
@Override
@@ -998,14 +998,14 @@ public class OrcInputFormat implements
((FileSplit) inputSplit).getPath(),
OrcFile.readerOptions(conf)), conf, (FileSplit) inputSplit);
}
-
+
OrcSplit split = (OrcSplit) inputSplit;
reporter.setStatus(inputSplit.toString());
Options options = new Options(conf).reporter(reporter);
final RowReader<OrcStruct> inner = getReader(inputSplit, options);
-
-
+
+
/*Even though there are no delta files, we still need to produce row ids so that an
* UPDATE or DELETE statement would work on a table which didn't have any previous updates*/
if (split.isOriginal() && split.getDeltas().isEmpty()) {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java Tue Sep 2 19:56:56 2014
@@ -88,6 +88,9 @@ public class OrcRecordUpdater implements
private final IntWritable bucket = new IntWritable();
private final LongWritable rowId = new LongWritable();
private long insertedRows = 0;
+ // This records how many rows have been inserted or deleted. It is separate from insertedRows
+ // because that is monotonically increasing to give new unique row ids.
+ private long rowCountDelta = 0;
private final KeyIndexBuilder indexBuilder = new KeyIndexBuilder();
static class AcidStats {
@@ -263,6 +266,7 @@ public class OrcRecordUpdater implements
}
addEvent(INSERT_OPERATION, currentTransaction, currentTransaction,
insertedRows++, row);
+ rowCountDelta++;
}
@Override
@@ -283,6 +287,7 @@ public class OrcRecordUpdater implements
}
addEvent(DELETE_OPERATION, currentTransaction, originalTransaction, rowId,
null);
+ rowCountDelta--;
}
@Override
@@ -317,7 +322,11 @@ public class OrcRecordUpdater implements
@Override
public SerDeStats getStats() {
- return null;
+ SerDeStats stats = new SerDeStats();
+ stats.setRowCount(rowCountDelta);
+ // Don't worry about setting raw data size diff. I have no idea how to calculate that
+ // without finding the row we are updating or deleting, which would be a mess.
+ return stats;
}
@VisibleForTesting
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Tue Sep 2 19:56:56 2014
@@ -2251,6 +2251,8 @@ class RecordReaderImpl implements Record
return ((DateColumnStatistics) index).getMaximum();
} else if (index instanceof DecimalColumnStatistics) {
return ((DecimalColumnStatistics) index).getMaximum();
+ } else if (index instanceof TimestampColumnStatistics) {
+ return ((TimestampColumnStatistics) index).getMaximum();
} else {
return null;
}
@@ -2273,6 +2275,8 @@ class RecordReaderImpl implements Record
return ((DateColumnStatistics) index).getMinimum();
} else if (index instanceof DecimalColumnStatistics) {
return ((DecimalColumnStatistics) index).getMinimum();
+ } else if (index instanceof TimestampColumnStatistics) {
+ return ((TimestampColumnStatistics) index).getMinimum();
} else {
return null;
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java Tue Sep 2 19:56:56 2014
@@ -142,9 +142,9 @@ class RunLengthIntegerWriterV2 implement
private final boolean signed;
private EncodingType encoding;
private int numLiterals;
- private long[] zigzagLiterals;
- private long[] baseRedLiterals;
- private long[] adjDeltas;
+ private final long[] zigzagLiterals = new long[MAX_SCOPE];
+ private final long[] baseRedLiterals = new long[MAX_SCOPE];
+ private final long[] adjDeltas = new long[MAX_SCOPE];
private long fixedDelta;
private int zzBits90p;
private int zzBits100p;
@@ -252,8 +252,11 @@ class RunLengthIntegerWriterV2 implement
// store the first value as delta value using zigzag encoding
utils.writeVslong(output, adjDeltas[0]);
- // adjacent delta values are bit packed
- utils.writeInts(adjDeltas, 1, adjDeltas.length - 1, fb, output);
+ // adjacent delta values are bit packed. The length of adjDeltas array is
+ // always one less than the number of literals (delta difference for n
+ // elements is n-1). We have already written one element, write the
+ // remaining numLiterals - 2 elements here
+ utils.writeInts(adjDeltas, 1, numLiterals - 2, fb, output);
}
}
@@ -323,7 +326,7 @@ class RunLengthIntegerWriterV2 implement
// base reduced literals are bit packed
int closestFixedBits = utils.getClosestFixedBits(fb);
- utils.writeInts(baseRedLiterals, 0, baseRedLiterals.length, closestFixedBits,
+ utils.writeInts(baseRedLiterals, 0, numLiterals, closestFixedBits,
output);
// write patch list
@@ -372,7 +375,7 @@ class RunLengthIntegerWriterV2 implement
output.write(headerSecondByte);
// bit packing the zigzag encoded literals
- utils.writeInts(zigzagLiterals, 0, zigzagLiterals.length, fb, output);
+ utils.writeInts(zigzagLiterals, 0, numLiterals, fb, output);
// reset run length
variableRunLength = 0;
@@ -414,14 +417,6 @@ class RunLengthIntegerWriterV2 implement
}
private void determineEncoding() {
- // used for direct encoding
- zigzagLiterals = new long[numLiterals];
-
- // used for patched base encoding
- baseRedLiterals = new long[numLiterals];
-
- // used for delta encoding
- adjDeltas = new long[numLiterals - 1];
int idx = 0;
@@ -530,10 +525,10 @@ class RunLengthIntegerWriterV2 implement
// is not significant then we can use direct or delta encoding
double p = 0.9;
- zzBits90p = utils.percentileBits(zigzagLiterals, p);
+ zzBits90p = utils.percentileBits(zigzagLiterals, 0, numLiterals, p);
p = 1.0;
- zzBits100p = utils.percentileBits(zigzagLiterals, p);
+ zzBits100p = utils.percentileBits(zigzagLiterals, 0, numLiterals, p);
int diffBitsLH = zzBits100p - zzBits90p;
@@ -543,18 +538,18 @@ class RunLengthIntegerWriterV2 implement
&& isFixedDelta == false) {
// patching is done only on base reduced values.
// remove base from literals
- for(int i = 0; i < zigzagLiterals.length; i++) {
+ for(int i = 0; i < numLiterals; i++) {
baseRedLiterals[i] = literals[i] - min;
}
// 95th percentile width is used to determine max allowed value
// after which patching will be done
p = 0.95;
- brBits95p = utils.percentileBits(baseRedLiterals, p);
+ brBits95p = utils.percentileBits(baseRedLiterals, 0, numLiterals, p);
// 100th percentile is used to compute the max patch width
p = 1.0;
- brBits100p = utils.percentileBits(baseRedLiterals, p);
+ brBits100p = utils.percentileBits(baseRedLiterals, 0, numLiterals, p);
// after base reducing the values, if the difference in bits between
// 95th percentile and 100th percentile value is zero then there
@@ -592,7 +587,7 @@ class RunLengthIntegerWriterV2 implement
// since we are considering only 95 percentile, the size of gap and
// patch array can contain only be 5% values
- patchLength = (int) Math.ceil((baseRedLiterals.length * 0.05));
+ patchLength = (int) Math.ceil((numLiterals * 0.05));
int[] gapList = new int[patchLength];
long[] patchList = new long[patchLength];
@@ -616,7 +611,7 @@ class RunLengthIntegerWriterV2 implement
int gap = 0;
int maxGap = 0;
- for(int i = 0; i < baseRedLiterals.length; i++) {
+ for(int i = 0; i < numLiterals; i++) {
// if value is above mask then create the patch and record the gap
if (baseRedLiterals[i] > mask) {
gap = i - prev;
@@ -694,9 +689,6 @@ class RunLengthIntegerWriterV2 implement
numLiterals = 0;
encoding = null;
prevDelta = 0;
- zigzagLiterals = null;
- baseRedLiterals = null;
- adjDeltas = null;
fixedDelta = 0;
zzBits90p = 0;
zzBits100p = 0;
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java Tue Sep 2 19:56:56 2014
@@ -244,7 +244,7 @@ final class SerializationUtils {
* @param p - percentile value (>=0.0 to <=1.0)
* @return pth percentile bits
*/
- int percentileBits(long[] data, double p) {
+ int percentileBits(long[] data, int offset, int length, double p) {
if ((p > 1.0) || (p <= 0.0)) {
return -1;
}
@@ -254,13 +254,12 @@ final class SerializationUtils {
int[] hist = new int[32];
// compute the histogram
- for(long l : data) {
- int idx = encodeBitWidth(findClosestNumBits(l));
+ for(int i = offset; i < (offset + length); i++) {
+ int idx = encodeBitWidth(findClosestNumBits(data[i]));
hist[idx] += 1;
}
- int len = data.length;
- int perLen = (int) (len * (1.0 - p));
+ int perLen = (int) (length * (1.0 - p));
// return the bits required by pth percentile length
for(int i = hist.length - 1; i >= 0; i--) {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Tue Sep 2 19:56:56 2014
@@ -1317,6 +1317,7 @@ class WriterImpl implements Writer, Memo
Timestamp val =
((TimestampObjectInspector) inspector).
getPrimitiveJavaObject(obj);
+ indexStatistics.updateTimestamp(val);
seconds.write((val.getTime() / MILLIS_PER_SECOND) - BASE_TIMESTAMP);
nanos.write(formatNanos(val.getNanos()));
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java Tue Sep 2 19:56:56 2014
@@ -13,6 +13,9 @@
*/
package org.apache.hadoop.hive.ql.io.parquet.convert;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.Writable;
@@ -30,7 +33,7 @@ public class ArrayWritableGroupConverter
private Writable[] mapPairContainer;
public ArrayWritableGroupConverter(final GroupType groupType, final HiveGroupConverter parent,
- final int index) {
+ final int index, List<TypeInfo> hiveSchemaTypeInfos) {
this.parent = parent;
this.index = index;
int count = groupType.getFieldCount();
@@ -40,7 +43,8 @@ public class ArrayWritableGroupConverter
isMap = count == 2;
converters = new Converter[count];
for (int i = 0; i < count; i++) {
- converters[i] = getConverterFromDescription(groupType.getType(i), i, this);
+ converters[i] = getConverterFromDescription(groupType.getType(i), i, this,
+ hiveSchemaTypeInfos);
}
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java Tue Sep 2 19:56:56 2014
@@ -16,6 +16,7 @@ package org.apache.hadoop.hive.ql.io.par
import java.util.ArrayList;
import java.util.List;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.Writable;
@@ -36,19 +37,21 @@ public class DataWritableGroupConverter
private final Object[] currentArr;
private Writable[] rootMap;
- public DataWritableGroupConverter(final GroupType requestedSchema, final GroupType tableSchema) {
- this(requestedSchema, null, 0, tableSchema);
+ public DataWritableGroupConverter(final GroupType requestedSchema, final GroupType tableSchema,
+ final List<TypeInfo> hiveSchemaTypeInfos) {
+ this(requestedSchema, null, 0, tableSchema, hiveSchemaTypeInfos);
final int fieldCount = tableSchema.getFieldCount();
this.rootMap = new Writable[fieldCount];
}
public DataWritableGroupConverter(final GroupType groupType, final HiveGroupConverter parent,
- final int index) {
- this(groupType, parent, index, groupType);
+ final int index, final List<TypeInfo> hiveSchemaTypeInfos) {
+ this(groupType, parent, index, groupType, hiveSchemaTypeInfos);
}
public DataWritableGroupConverter(final GroupType selectedGroupType,
- final HiveGroupConverter parent, final int index, final GroupType containingGroupType) {
+ final HiveGroupConverter parent, final int index, final GroupType containingGroupType,
+ final List<TypeInfo> hiveSchemaTypeInfos) {
this.parent = parent;
this.index = index;
final int totalFieldCount = containingGroupType.getFieldCount();
@@ -62,7 +65,8 @@ public class DataWritableGroupConverter
Type subtype = selectedFields.get(i);
if (containingGroupType.getFields().contains(subtype)) {
converters[i] = getConverterFromDescription(subtype,
- containingGroupType.getFieldIndex(subtype.getName()), this);
+ containingGroupType.getFieldIndex(subtype.getName()), this,
+ hiveSchemaTypeInfos);
} else {
throw new IllegalStateException("Group type [" + containingGroupType +
"] does not contain requested field: " + subtype);
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java Tue Sep 2 19:56:56 2014
@@ -13,6 +13,9 @@
*/
package org.apache.hadoop.hive.ql.io.parquet.convert;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.ArrayWritable;
import parquet.io.api.GroupConverter;
@@ -28,8 +31,10 @@ public class DataWritableRecordConverter
private final DataWritableGroupConverter root;
- public DataWritableRecordConverter(final GroupType requestedSchema, final GroupType tableSchema) {
- this.root = new DataWritableGroupConverter(requestedSchema, tableSchema);
+ public DataWritableRecordConverter(final GroupType requestedSchema, final GroupType tableSchema,
+ final List<TypeInfo> hiveColumnTypeInfos) {
+ this.root = new DataWritableGroupConverter(requestedSchema, tableSchema,
+ hiveColumnTypeInfos);
}
@Override
@@ -41,4 +46,4 @@ public class DataWritableRecordConverter
public GroupConverter getRootConverter() {
return root;
}
-}
+}
\ No newline at end of file
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java Tue Sep 2 19:56:56 2014
@@ -16,12 +16,19 @@ package org.apache.hadoop.hive.ql.io.par
import java.math.BigDecimal;
import java.sql.Timestamp;
import java.util.ArrayList;
+import java.util.List;
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime;
import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils;
+import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.FloatWritable;
@@ -145,6 +152,32 @@ public enum ETypeConverter {
}
};
}
+ },
+ ECHAR_CONVERTER(HiveCharWritable.class) {
+ @Override
+ Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) {
+ return new BinaryConverter<HiveCharWritable>(type, parent, index) {
+ @Override
+ protected HiveCharWritable convert(Binary binary) {
+ HiveChar hiveChar = new HiveChar();
+ hiveChar.setValue(binary.toStringUsingUTF8());
+ return new HiveCharWritable(hiveChar);
+ }
+ };
+ }
+ },
+ EVARCHAR_CONVERTER(HiveVarcharWritable.class) {
+ @Override
+ Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) {
+ return new BinaryConverter<HiveVarcharWritable>(type, parent, index) {
+ @Override
+ protected HiveVarcharWritable convert(Binary binary) {
+ HiveVarchar hiveVarchar = new HiveVarchar();
+ hiveVarchar.setValue(binary.toStringUsingUTF8());
+ return new HiveVarcharWritable(hiveVarchar);
+ }
+ };
+ }
};
final Class<?> _type;
@@ -159,7 +192,8 @@ public enum ETypeConverter {
abstract Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent);
- public static Converter getNewConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) {
+ public static Converter getNewConverter(final PrimitiveType type, final int index,
+ final HiveGroupConverter parent, List<TypeInfo> hiveSchemaTypeInfos) {
if (type.isPrimitive() && (type.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType.PrimitiveTypeName.INT96))) {
//TODO- cleanup once parquet support Timestamp type annotation.
return ETypeConverter.ETIMESTAMP_CONVERTER.getConverter(type, index, parent);
@@ -167,7 +201,15 @@ public enum ETypeConverter {
if (OriginalType.DECIMAL == type.getOriginalType()) {
return EDECIMAL_CONVERTER.getConverter(type, index, parent);
} else if (OriginalType.UTF8 == type.getOriginalType()) {
- return ESTRING_CONVERTER.getConverter(type, index, parent);
+ if (hiveSchemaTypeInfos.get(index).getTypeName()
+ .startsWith(serdeConstants.CHAR_TYPE_NAME)) {
+ return ECHAR_CONVERTER.getConverter(type, index, parent);
+ } else if (hiveSchemaTypeInfos.get(index).getTypeName()
+ .startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
+ return EVARCHAR_CONVERTER.getConverter(type, index, parent);
+ } else if (type.isPrimitive()) {
+ return ESTRING_CONVERTER.getConverter(type, index, parent);
+ }
}
Class<?> javaType = type.getPrimitiveTypeName().javaType;
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java Tue Sep 2 19:56:56 2014
@@ -13,6 +13,9 @@
*/
package org.apache.hadoop.hive.ql.io.parquet.convert;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.Writable;
import parquet.io.api.Converter;
@@ -23,17 +26,20 @@ import parquet.schema.Type.Repetition;
public abstract class HiveGroupConverter extends GroupConverter {
protected static Converter getConverterFromDescription(final Type type, final int index,
- final HiveGroupConverter parent) {
+ final HiveGroupConverter parent, List<TypeInfo> hiveSchemaTypeInfos) {
if (type == null) {
return null;
}
if (type.isPrimitive()) {
- return ETypeConverter.getNewConverter(type.asPrimitiveType(), index, parent);
+ return ETypeConverter.getNewConverter(type.asPrimitiveType(), index, parent,
+ hiveSchemaTypeInfos);
} else {
if (type.asGroupType().getRepetition() == Repetition.REPEATED) {
- return new ArrayWritableGroupConverter(type.asGroupType(), parent, index);
+ return new ArrayWritableGroupConverter(type.asGroupType(), parent, index,
+ hiveSchemaTypeInfos);
} else {
- return new DataWritableGroupConverter(type.asGroupType(), parent, index);
+ return new DataWritableGroupConverter(type.asGroupType(), parent, index,
+ hiveSchemaTypeInfos);
}
}
}
@@ -42,4 +48,4 @@ public abstract class HiveGroupConverter
protected abstract void add(int index, Writable value);
-}
+}
\ No newline at end of file
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java Tue Sep 2 19:56:56 2014
@@ -16,6 +16,7 @@ package org.apache.hadoop.hive.ql.io.par
import java.util.List;
import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe;
+import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
@@ -25,7 +26,6 @@ import org.apache.hadoop.hive.serde2.typ
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import parquet.schema.ConversionPatterns;
-import parquet.schema.DecimalMetadata;
import parquet.schema.GroupType;
import parquet.schema.MessageType;
import parquet.schema.OriginalType;
@@ -81,13 +81,21 @@ public class HiveSchemaConverter {
return new PrimitiveType(repetition, PrimitiveTypeName.INT96, name);
} else if (typeInfo.equals(TypeInfoFactory.voidTypeInfo)) {
throw new UnsupportedOperationException("Void type not implemented");
+ } else if (typeInfo.getTypeName().toLowerCase().startsWith(
+ serdeConstants.CHAR_TYPE_NAME)) {
+ return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8)
+ .named(name);
+ } else if (typeInfo.getTypeName().toLowerCase().startsWith(
+ serdeConstants.VARCHAR_TYPE_NAME)) {
+ return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8)
+ .named(name);
} else if (typeInfo instanceof DecimalTypeInfo) {
DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
int prec = decimalTypeInfo.precision();
int scale = decimalTypeInfo.scale();
int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1];
return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).
- scale(scale).precision(prec).named(name);
+ scale(scale).precision(prec).named(name);
} else if (typeInfo.equals(TypeInfoFactory.unknownTypeInfo)) {
throw new UnsupportedOperationException("Unknown type not implemented");
} else {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java Tue Sep 2 19:56:56 2014
@@ -14,6 +14,7 @@
package org.apache.hadoop.hive.ql.io.parquet.read;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -23,6 +24,8 @@ import org.apache.hadoop.hive.ql.io.IOCo
import org.apache.hadoop.hive.ql.io.parquet.convert.DataWritableRecordConverter;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.util.StringUtils;
@@ -60,6 +63,28 @@ public class DataWritableReadSupport ext
return (List<String>) VirtualColumn.
removeVirtualColumns(StringUtils.getStringCollection(columns));
}
+
+ private static List<TypeInfo> getColumnTypes(Configuration configuration) {
+
+ List<String> columnNames;
+ String columnNamesProperty = configuration.get(IOConstants.COLUMNS);
+ if (columnNamesProperty.length() == 0) {
+ columnNames = new ArrayList<String>();
+ } else {
+ columnNames = Arrays.asList(columnNamesProperty.split(","));
+ }
+ List<TypeInfo> columnTypes;
+ String columnTypesProperty = configuration.get(IOConstants.COLUMNS_TYPES);
+ if (columnTypesProperty.length() == 0) {
+ columnTypes = new ArrayList<TypeInfo>();
+ } else {
+ columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypesProperty);
+ }
+
+ columnTypes = VirtualColumn.removeVirtualColumnTypes(columnNames, columnTypes);
+ return columnTypes;
+ }
+
/**
*
* It creates the readContext for Parquet side with the requested schema during the init phase.
@@ -115,7 +140,7 @@ public class DataWritableReadSupport ext
throw new IllegalStateException(msg);
}
}
- }
+ }
}
requestedSchemaByUser = resolveSchemaAccess(new MessageType(fileSchema.getName(),
typeListWanted), fileSchema, configuration);
@@ -148,7 +173,8 @@ public class DataWritableReadSupport ext
}
final MessageType tableSchema = resolveSchemaAccess(MessageTypeParser.
parseMessageType(metadata.get(HIVE_SCHEMA_KEY)), fileSchema, configuration);
- return new DataWritableRecordConverter(readContext.getRequestedSchema(), tableSchema);
+ return new DataWritableRecordConverter(readContext.getRequestedSchema(), tableSchema,
+ getColumnTypes(configuration));
}
/**
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java Tue Sep 2 19:56:56 2014
@@ -25,12 +25,14 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hadoop.io.ArrayWritable;
/**
@@ -102,12 +104,10 @@ public class ArrayWritableObjectInspecto
return PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
} else if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) {
throw new UnsupportedOperationException("Parquet does not support date. See HIVE-6384");
- } else if (typeInfo.getTypeName().toLowerCase().startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
- throw new UnsupportedOperationException("Parquet does not support decimal. See HIVE-6384");
} else if (typeInfo.getTypeName().toLowerCase().startsWith(serdeConstants.CHAR_TYPE_NAME)) {
- throw new UnsupportedOperationException("Parquet does not support char. See HIVE-6384");
+ return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector((CharTypeInfo) typeInfo);
} else if (typeInfo.getTypeName().toLowerCase().startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
- throw new UnsupportedOperationException("Parquet does not support varchar. See HIVE-6384");
+ return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector((VarcharTypeInfo) typeInfo);
} else {
throw new UnsupportedOperationException("Unknown field type: " + typeInfo);
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java Tue Sep 2 19:56:56 2014
@@ -42,6 +42,8 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
@@ -60,6 +62,7 @@ import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
+import parquet.io.api.Binary;
/**
*
@@ -280,6 +283,12 @@ public class ParquetHiveSerDe extends Ab
return new BytesWritable(tgt);
case TIMESTAMP:
return new TimestampWritable(((TimestampObjectInspector) inspector).getPrimitiveJavaObject(obj));
+ case CHAR:
+ String strippedValue = ((HiveCharObjectInspector) inspector).getPrimitiveJavaObject(obj).getStrippedValue();
+ return new BytesWritable(Binary.fromString(strippedValue).getBytes());
+ case VARCHAR:
+ String value = ((HiveVarcharObjectInspector) inspector).getPrimitiveJavaObject(obj).getValue();
+ return new BytesWritable(Binary.fromString(value).getBytes());
default:
throw new SerDeException("Unknown primitive : " + inspector.getPrimitiveCategory());
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java?rev=1622108&r1=1622107&r2=1622108&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java Tue Sep 2 19:56:56 2014
@@ -47,7 +47,8 @@ public interface PredicateLeaf {
FLOAT, // float and double
STRING, // string, char, varchar
DATE,
- DECIMAL
+ DECIMAL,
+ TIMESTAMP
}
/**