You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/07/31 00:22:46 UTC
svn commit: r1508669 [37/39] - in /hive/branches/vectorization: ./
common/src/java/org/apache/hadoop/hive/conf/ conf/
contrib/src/test/results/clientpositive/ data/files/ eclipse-templates/
hcatalog/build-support/ant/ hcatalog/core/src/main/java/org/ap...
Modified: hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java?rev=1508669&r1=1508668&r2=1508669&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java (original)
+++ hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java Tue Jul 30 22:22:35 2013
@@ -18,9 +18,17 @@
package org.apache.hadoop.hive.serde2.avro;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
+import org.apache.avro.Schema.Type;
import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericData.Fixed;
import org.apache.avro.generic.GenericEnumSymbol;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -38,15 +46,6 @@ import org.apache.hadoop.hive.serde2.typ
import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
import org.apache.hadoop.io.Writable;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import static org.apache.avro.Schema.Type.BYTES;
-import static org.apache.avro.Schema.Type.FIXED;
-
class AvroSerializer {
private static final Log LOG = LogFactory.getLog(AvroSerializer.class);
@@ -67,12 +66,14 @@ class AvroSerializer {
GenericData.Record record = new GenericData.Record(schema);
List<? extends StructField> outputFieldRefs = soi.getAllStructFieldRefs();
- if(outputFieldRefs.size() != columnNames.size())
+ if(outputFieldRefs.size() != columnNames.size()) {
throw new AvroSerdeException("Number of input columns was different than output columns (in = " + columnNames.size() + " vs out = " + outputFieldRefs.size());
+ }
int size = schema.getFields().size();
- if(outputFieldRefs.size() != size) // Hive does this check for us, so we should be ok.
+ if(outputFieldRefs.size() != size) {
throw new AvroSerdeException("Hive passed in a different number of fields than the schema expected: (Hive wanted " + outputFieldRefs.size() +", Avro expected " + schema.getFields().size());
+ }
List<? extends StructField> allStructFieldRefs = soi.getAllStructFieldRefs();
List<Object> structFieldsDataAsList = soi.getStructFieldsDataAsList(o);
@@ -88,8 +89,9 @@ class AvroSerializer {
record.put(field.name(), val);
}
- if(!GenericData.get().validate(schema, record))
+ if(!GenericData.get().validate(schema, record)) {
throw new SerializeToAvroException(schema, record);
+ }
cache.setRecord(record);
@@ -111,7 +113,7 @@ class AvroSerializer {
switch(typeInfo.getCategory()) {
case PRIMITIVE:
assert fieldOI instanceof PrimitiveObjectInspector;
- return serializePrimitive(typeInfo, (PrimitiveObjectInspector) fieldOI, structFieldData);
+ return serializePrimitive(typeInfo, (PrimitiveObjectInspector) fieldOI, structFieldData, schema);
case MAP:
assert fieldOI instanceof MapObjectInspector;
assert typeInfo instanceof MapTypeInfo;
@@ -153,7 +155,7 @@ class AvroSerializer {
};
private Object serializeEnum(TypeInfo typeInfo, PrimitiveObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException {
- return enums.retrieve(schema).retrieve(serializePrimitive(typeInfo, fieldOI, structFieldData));
+ return enums.retrieve(schema).retrieve(serializePrimitive(typeInfo, fieldOI, structFieldData, schema));
}
private Object serializeStruct(StructTypeInfo typeInfo, StructObjectInspector ssoi, Object o, Schema schema) throws AvroSerdeException {
@@ -176,14 +178,24 @@ class AvroSerializer {
return record;
}
- private Object serializePrimitive(TypeInfo typeInfo, PrimitiveObjectInspector fieldOI, Object structFieldData) throws AvroSerdeException {
+ private Object serializePrimitive(TypeInfo typeInfo, PrimitiveObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException {
switch(fieldOI.getPrimitiveCategory()) {
- case UNKNOWN:
- throw new AvroSerdeException("Received UNKNOWN primitive category.");
- case VOID:
- return null;
- default: // All other primitive types are simple
- return fieldOI.getPrimitiveJavaObject(structFieldData);
+ case BINARY:
+ if (schema.getType() == Type.BYTES){
+ ByteBuffer bb = ByteBuffer.wrap((byte[])fieldOI.getPrimitiveJavaObject(structFieldData));
+ return bb.rewind();
+ } else if (schema.getType() == Type.FIXED){
+ Fixed fixed = new GenericData.Fixed(schema, (byte[])fieldOI.getPrimitiveJavaObject(structFieldData));
+ return fixed;
+ } else {
+ throw new AvroSerdeException("Unexpected Avro schema for Binary TypeInfo: " + schema.getType());
+ }
+ case UNKNOWN:
+ throw new AvroSerdeException("Received UNKNOWN primitive category.");
+ case VOID:
+ return null;
+ default: // All other primitive types are simple
+ return fieldOI.getPrimitiveJavaObject(structFieldData);
}
}
@@ -197,53 +209,7 @@ class AvroSerializer {
schema.getTypes().get(tag));
}
- // We treat FIXED and BYTES as arrays of tinyints within Hive. Check
- // if we're dealing with either of these types and thus need to serialize
- // them as their Avro types.
- private boolean isTransformedType(Schema schema) {
- return schema.getType().equals(FIXED) || schema.getType().equals(BYTES);
- }
-
- private Object serializeTransformedType(ListTypeInfo typeInfo, ListObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException {
- if(LOG.isDebugEnabled()) {
- LOG.debug("Beginning to transform " + typeInfo + " with Avro schema " + schema.toString(false));
- }
- if(schema.getType().equals(FIXED)) return serializedAvroFixed(typeInfo, fieldOI, structFieldData, schema);
- else return serializeAvroBytes(typeInfo, fieldOI, structFieldData, schema);
-
- }
-
- private Object serializeAvroBytes(ListTypeInfo typeInfo, ListObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException {
- ByteBuffer bb = ByteBuffer.wrap(extraByteArray(fieldOI, structFieldData));
- return bb.rewind();
- }
-
- private Object serializedAvroFixed(ListTypeInfo typeInfo, ListObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException {
- return new GenericData.Fixed(schema, extraByteArray(fieldOI, structFieldData));
- }
-
- // For transforming to BYTES and FIXED, pull out the byte array Avro will want
- private byte[] extraByteArray(ListObjectInspector fieldOI, Object structFieldData) throws AvroSerdeException {
- // Grab a book. This is going to be slow.
- int listLength = fieldOI.getListLength(structFieldData);
- byte[] bytes = new byte[listLength];
- assert fieldOI.getListElementObjectInspector() instanceof PrimitiveObjectInspector;
- PrimitiveObjectInspector poi = (PrimitiveObjectInspector)fieldOI.getListElementObjectInspector();
- List<?> list = fieldOI.getList(structFieldData);
-
- for(int i = 0; i < listLength; i++) {
- Object b = poi.getPrimitiveJavaObject(list.get(i));
- if(!(b instanceof Byte))
- throw new AvroSerdeException("Attempting to transform to bytes, element was not byte but " + b.getClass().getCanonicalName());
- bytes[i] = (Byte)b;
- }
- return bytes;
- }
-
private Object serializeList(ListTypeInfo typeInfo, ListObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException {
- if(isTransformedType(schema))
- return serializeTransformedType(typeInfo, fieldOI, structFieldData, schema);
-
List<?> list = fieldOI.getList(structFieldData);
List<Object> deserialized = new ArrayList<Object>(list.size());
@@ -260,8 +226,9 @@ class AvroSerializer {
private Object serializeMap(MapTypeInfo typeInfo, MapObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException {
// Avro only allows maps with string keys
- if(!mapHasStringKey(fieldOI.getMapKeyObjectInspector()))
+ if(!mapHasStringKey(fieldOI.getMapKeyObjectInspector())) {
throw new AvroSerdeException("Avro only supports maps with keys as Strings. Current Map is: " + typeInfo.toString());
+ }
ObjectInspector mapKeyObjectInspector = fieldOI.getMapKeyObjectInspector();
ObjectInspector mapValueObjectInspector = fieldOI.getMapValueObjectInspector();
Modified: hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java?rev=1508669&r1=1508668&r2=1508669&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java (original)
+++ hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java Tue Jul 30 22:22:35 2013
@@ -17,24 +17,26 @@
*/
package org.apache.hadoop.hive.serde2.avro;
-import org.apache.avro.Schema;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Hashtable;
-import java.util.List;
-import java.util.Map;
-
import static org.apache.avro.Schema.Type.BOOLEAN;
+import static org.apache.avro.Schema.Type.BYTES;
import static org.apache.avro.Schema.Type.DOUBLE;
+import static org.apache.avro.Schema.Type.FIXED;
import static org.apache.avro.Schema.Type.FLOAT;
import static org.apache.avro.Schema.Type.INT;
import static org.apache.avro.Schema.Type.LONG;
import static org.apache.avro.Schema.Type.NULL;
import static org.apache.avro.Schema.Type.STRING;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Hashtable;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.avro.Schema;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
/**
* Convert an Avro Schema to a Hive TypeInfo
*/
@@ -47,7 +49,8 @@ class SchemaToTypeInfo {
// long bigint check
// float double check
// double double check
- // bytes
+ // bytes binary check
+ // fixed binary check
// string string check
// tinyint
// smallint
@@ -56,13 +59,15 @@ class SchemaToTypeInfo {
private static final Map<Schema.Type, TypeInfo> primitiveTypeToTypeInfo = initTypeMap();
private static Map<Schema.Type, TypeInfo> initTypeMap() {
Map<Schema.Type, TypeInfo> theMap = new Hashtable<Schema.Type, TypeInfo>();
- theMap.put(STRING, TypeInfoFactory.getPrimitiveTypeInfo("string"));
- theMap.put(INT, TypeInfoFactory.getPrimitiveTypeInfo("int"));
+ theMap.put(NULL, TypeInfoFactory.getPrimitiveTypeInfo("void"));
theMap.put(BOOLEAN, TypeInfoFactory.getPrimitiveTypeInfo("boolean"));
+ theMap.put(INT, TypeInfoFactory.getPrimitiveTypeInfo("int"));
theMap.put(LONG, TypeInfoFactory.getPrimitiveTypeInfo("bigint"));
theMap.put(FLOAT, TypeInfoFactory.getPrimitiveTypeInfo("float"));
theMap.put(DOUBLE, TypeInfoFactory.getPrimitiveTypeInfo("double"));
- theMap.put(NULL, TypeInfoFactory.getPrimitiveTypeInfo("void"));
+ theMap.put(BYTES, TypeInfoFactory.getPrimitiveTypeInfo("binary"));
+ theMap.put(FIXED, TypeInfoFactory.getPrimitiveTypeInfo("binary"));
+ theMap.put(STRING, TypeInfoFactory.getPrimitiveTypeInfo("string"));
return Collections.unmodifiableMap(theMap);
}
@@ -106,22 +111,22 @@ class SchemaToTypeInfo {
private static TypeInfo generateTypeInfoWorker(Schema schema) throws AvroSerdeException {
// Avro requires NULLable types to be defined as unions of some type T
// and NULL. This is annoying and we're going to hide it from the user.
- if(AvroSerdeUtils.isNullableType(schema))
+ if(AvroSerdeUtils.isNullableType(schema)) {
return generateTypeInfo(AvroSerdeUtils.getOtherTypeFromNullableType(schema));
+ }
Schema.Type type = schema.getType();
- if(primitiveTypeToTypeInfo.containsKey(type))
+ if(primitiveTypeToTypeInfo.containsKey(type)) {
return primitiveTypeToTypeInfo.get(type);
+ }
switch(type) {
- case BYTES: return generateBytesTypeInfo(schema);
case RECORD: return generateRecordTypeInfo(schema);
case MAP: return generateMapTypeInfo(schema);
case ARRAY: return generateArrayTypeInfo(schema);
case UNION: return generateUnionTypeInfo(schema);
case ENUM: return generateEnumTypeInfo(schema);
- case FIXED: return generateFixedTypeInfo(schema);
default: throw new AvroSerdeException("Do not yet support: " + schema);
}
}
@@ -183,22 +188,4 @@ class SchemaToTypeInfo {
return TypeInfoFactory.getPrimitiveTypeInfo("string");
}
-
- // Hive doesn't have a Fixed type, so we're going to treat them as arrays of
- // bytes
- // TODO: Make note in documentation that Hive sends these out as signed bytes.
- private static final TypeInfo FIXED_AND_BYTES_EQUIV =
- TypeInfoFactory.getListTypeInfo(TypeInfoFactory.byteTypeInfo);
- private static TypeInfo generateFixedTypeInfo(Schema schema) {
- assert schema.getType().equals(Schema.Type.FIXED);
-
- return FIXED_AND_BYTES_EQUIV;
- }
-
- // Avro considers bytes to be a primitive type, but Hive doesn't. We'll
- // convert them to a list of bytes, just like Fixed. Sigh.
- private static TypeInfo generateBytesTypeInfo(Schema schema) {
- assert schema.getType().equals(Schema.Type.BYTES);
- return FIXED_AND_BYTES_EQUIV;
- }
}
Modified: hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java?rev=1508669&r1=1508668&r2=1508669&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java (original)
+++ hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java Tue Jul 30 22:22:35 2013
@@ -37,6 +37,7 @@ import org.apache.hadoop.hive.serde2.Abs
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
@@ -52,6 +53,7 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector;
@@ -225,11 +227,7 @@ public class BinarySortableSerDe extends
}
case INT: {
IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
- int v = buffer.read(invert) ^ 0x80;
- for (int i = 0; i < 3; i++) {
- v = (v << 8) + (buffer.read(invert) & 0xff);
- }
- r.set(v);
+ r.set(deserializeInt(buffer, invert));
return r;
}
case LONG: {
@@ -368,10 +366,17 @@ public class BinarySortableSerDe extends
return bw;
}
+ case DATE: {
+ DateWritable d = reuse == null ? new DateWritable()
+ : (DateWritable) reuse;
+ d.set(deserializeInt(buffer, invert));
+ return d;
+ }
+
case TIMESTAMP:
TimestampWritable t = (reuse == null ? new TimestampWritable() :
(TimestampWritable) reuse);
- byte[] bytes = new byte[8];
+ byte[] bytes = new byte[TimestampWritable.BINARY_SORTABLE_LENGTH];
for (int i = 0; i < bytes.length; i++) {
bytes[i] = buffer.read(invert);
@@ -539,6 +544,14 @@ public class BinarySortableSerDe extends
}
}
+ private static int deserializeInt(InputByteBuffer buffer, boolean invert) throws IOException {
+ int v = buffer.read(invert) ^ 0x80;
+ for (int i = 0; i < 3; i++) {
+ v = (v << 8) + (buffer.read(invert) & 0xff);
+ }
+ return v;
+ }
+
BytesWritable serializeBytesWritable = new BytesWritable();
OutputByteBuffer outputByteBuffer = new OutputByteBuffer();
@@ -596,10 +609,7 @@ public class BinarySortableSerDe extends
case INT: {
IntObjectInspector ioi = (IntObjectInspector) poi;
int v = ioi.get(o);
- buffer.write((byte) ((v >> 24) ^ 0x80), invert);
- buffer.write((byte) (v >> 16), invert);
- buffer.write((byte) (v >> 8), invert);
- buffer.write((byte) v, invert);
+ serializeInt(buffer, v, invert);
return;
}
case LONG: {
@@ -666,6 +676,12 @@ public class BinarySortableSerDe extends
serializeBytes(buffer, toSer, ba.getLength(), invert);
return;
}
+ case DATE: {
+ DateObjectInspector doi = (DateObjectInspector) poi;
+ int v = doi.getPrimitiveWritableObject(o).getDays();
+ serializeInt(buffer, v, invert);
+ return;
+ }
case TIMESTAMP: {
TimestampObjectInspector toi = (TimestampObjectInspector) poi;
TimestampWritable t = toi.getPrimitiveWritableObject(o);
@@ -788,6 +804,14 @@ public class BinarySortableSerDe extends
}
buffer.write((byte) 0, invert);
}
+
+ private static void serializeInt(OutputByteBuffer buffer, int v, boolean invert) {
+ buffer.write((byte) ((v >> 24) ^ 0x80), invert);
+ buffer.write((byte) (v >> 16), invert);
+ buffer.write((byte) (v >> 8), invert);
+ buffer.write((byte) v, invert);
+ }
+
@Override
public SerDeStats getSerDeStats() {
// no support for statistics
Modified: hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java?rev=1508669&r1=1508668&r2=1508669&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java (original)
+++ hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java Tue Jul 30 22:22:35 2013
@@ -25,7 +25,6 @@ import java.math.BigDecimal;
import java.sql.Timestamp;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
-import java.util.Arrays;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -59,8 +58,17 @@ public class TimestampWritable implement
static final public byte[] nullBytes = {0x0, 0x0, 0x0, 0x0};
- private static final int NO_DECIMAL_MASK = 0x7FFFFFFF;
- private static final int HAS_DECIMAL_MASK = 0x80000000;
+ private static final int DECIMAL_OR_SECOND_VINT_FLAG = 0x80000000;
+ private static final int LOWEST_31_BITS_OF_SEC_MASK = 0x7fffffff;
+
+ private static final long SEVEN_BYTE_LONG_SIGN_FLIP = 0xff80L << 48;
+
+ private static final BigDecimal BILLION_BIG_DECIMAL = BigDecimal.valueOf(1000000000);
+
+ /** The maximum number of bytes required for a TimestampWritable */
+ public static final int MAX_BYTES = 13;
+
+ public static final int BINARY_SORTABLE_LENGTH = 11;
private static final ThreadLocal<DateFormat> threadLocalDateFormat =
new ThreadLocal<DateFormat>() {
@@ -82,16 +90,12 @@ public class TimestampWritable implement
/* Allow use of external byte[] for efficiency */
private byte[] currentBytes;
- private final byte[] internalBytes = new byte[9];
+ private final byte[] internalBytes = new byte[MAX_BYTES];
private byte[] externalBytes;
private int offset;
- /* Reused to read VInts */
- static private final VInt vInt = new VInt();
-
/* Constructors */
public TimestampWritable() {
- Arrays.fill(internalBytes, (byte) 0x0);
bytesEmpty = false;
currentBytes = internalBytes;
offset = 0;
@@ -156,11 +160,14 @@ public class TimestampWritable implement
*
* @return seconds corresponding to this TimestampWritable
*/
- public int getSeconds() {
- if (bytesEmpty) {
- return (int) (timestamp.getTime() / 1000);
+ public long getSeconds() {
+ if (!timestampEmpty) {
+ return millisToSeconds(timestamp.getTime());
+ } else if (!bytesEmpty) {
+ return TimestampWritable.getSeconds(currentBytes, offset);
+ } else {
+ throw new IllegalStateException("Both timestamp and bytes are empty");
}
- return TimestampWritable.getSeconds(currentBytes, offset);
}
/**
@@ -170,26 +177,33 @@ public class TimestampWritable implement
public int getNanos() {
if (!timestampEmpty) {
return timestamp.getNanos();
+ } else if (!bytesEmpty) {
+ return hasDecimalOrSecondVInt() ?
+ TimestampWritable.getNanos(currentBytes, offset + 4) : 0;
+ } else {
+ throw new IllegalStateException("Both timestamp and bytes are empty");
}
-
- return hasDecimal() ? TimestampWritable.getNanos(currentBytes, offset+4) : 0;
}
/**
- *
- * @return length of serialized TimestampWritable data
+ * @return length of serialized TimestampWritable data. As a side effect, populates the internal
+ * byte array if empty.
*/
- private int getTotalLength() {
- return 4 + getDecimalLength();
+ int getTotalLength() {
+ checkBytes();
+ return getTotalLength(currentBytes, offset);
}
- /**
- *
- * @return number of bytes the variable length decimal takes up
- */
- private int getDecimalLength() {
- checkBytes();
- return hasDecimal() ? WritableUtils.decodeVIntSize(currentBytes[offset+4]) : 0;
+ public static int getTotalLength(byte[] bytes, int offset) {
+ int len = 4;
+ if (hasDecimalOrSecondVInt(bytes[offset])) {
+ int firstVIntLen = WritableUtils.decodeVIntSize(bytes[offset + 4]);
+ len += firstVIntLen;
+ if (hasSecondVInt(bytes[offset + 4])) {
+ len += WritableUtils.decodeVIntSize(bytes[offset + 4 + firstVIntLen]);
+ }
+ }
+ return len;
}
public Timestamp getTimestamp() {
@@ -215,33 +229,45 @@ public class TimestampWritable implement
/**
* @return byte[] representation of TimestampWritable that is binary
- * sortable (4 byte seconds, 4 bytes for nanoseconds)
+ * sortable (7 bytes for seconds, 4 bytes for nanoseconds)
*/
public byte[] getBinarySortable() {
- byte[] b = new byte[8];
+ byte[] b = new byte[BINARY_SORTABLE_LENGTH];
int nanos = getNanos();
- int seconds = HAS_DECIMAL_MASK | getSeconds();
- intToBytes(seconds, b, 0);
- intToBytes(nanos, b, 4);
+ // We flip the highest-order bit of the seven-byte representation of seconds to make negative
+ // values come before positive ones.
+ long seconds = getSeconds() ^ SEVEN_BYTE_LONG_SIGN_FLIP;
+ sevenByteLongToBytes(seconds, b, 0);
+ intToBytes(nanos, b, 7);
return b;
}
/**
* Given a byte[] that has binary sortable data, initialize the internal
* structures to hold that data
- * @param bytes
- * @param offset
+ * @param bytes the byte array that holds the binary sortable representation
+ * @param binSortOffset offset of the binary-sortable representation within the buffer.
*/
- public void setBinarySortable(byte[] bytes, int offset) {
- int seconds = bytesToInt(bytes, offset);
- int nanos = bytesToInt(bytes, offset+4);
- if (nanos == 0) {
- seconds &= NO_DECIMAL_MASK;
+ public void setBinarySortable(byte[] bytes, int binSortOffset) {
+ // Flip the sign bit (and unused bits of the high-order byte) of the seven-byte long back.
+ long seconds = readSevenByteLong(bytes, binSortOffset) ^ SEVEN_BYTE_LONG_SIGN_FLIP;
+ int nanos = bytesToInt(bytes, binSortOffset + 7);
+ int firstInt = (int) seconds;
+ boolean hasSecondVInt = seconds < 0 || seconds > Integer.MAX_VALUE;
+ if (nanos != 0 || hasSecondVInt) {
+ firstInt |= DECIMAL_OR_SECOND_VINT_FLAG;
} else {
- seconds |= HAS_DECIMAL_MASK;
+ firstInt &= LOWEST_31_BITS_OF_SEC_MASK;
}
- intToBytes(seconds, internalBytes, 0);
- setNanosBytes(nanos, internalBytes, 4);
+
+ intToBytes(firstInt, internalBytes, 0);
+ setNanosBytes(nanos, internalBytes, 4, hasSecondVInt);
+ if (hasSecondVInt) {
+ LazyBinaryUtils.writeVLongToByteArray(internalBytes,
+ 4 + WritableUtils.decodeVIntSize(internalBytes[4]),
+ seconds >> 31);
+ }
+
currentBytes = internalBytes;
this.offset = 0;
}
@@ -268,7 +294,7 @@ public class TimestampWritable implement
public double getDouble() {
double seconds, nanos;
if (bytesEmpty) {
- seconds = timestamp.getTime() / 1000;
+ seconds = millisToSeconds(timestamp.getTime());
nanos = timestamp.getNanos();
} else {
seconds = getSeconds();
@@ -281,10 +307,31 @@ public class TimestampWritable implement
public void readFields(DataInput in) throws IOException {
in.readFully(internalBytes, 0, 4);
- if (TimestampWritable.hasDecimal(internalBytes[0])) {
+ if (TimestampWritable.hasDecimalOrSecondVInt(internalBytes[0])) {
in.readFully(internalBytes, 4, 1);
int len = (byte) WritableUtils.decodeVIntSize(internalBytes[4]);
- in.readFully(internalBytes, 5, len-1);
+ if (len > 1) {
+ in.readFully(internalBytes, 5, len-1);
+ }
+
+ long vlong = LazyBinaryUtils.readVLongFromByteArray(internalBytes, 4);
+ if (vlong < -1000000000 || vlong > 999999999) {
+ throw new IOException(
+ "Invalid first vint value (encoded nanoseconds) of a TimestampWritable: " + vlong +
+ ", expected to be between -1000000000 and 999999999.");
+ // Note that -1000000000 is a valid value corresponding to a nanosecond timestamp
+ // of 999999999, because if the second VInt is present, we use the value
+ // (-reversedNanoseconds - 1) as the second VInt.
+ }
+ if (vlong < 0) {
+ // This indicates there is a second VInt containing the additional bits of the seconds
+ // field.
+ in.readFully(internalBytes, 4 + len, 1);
+ int secondVIntLen = (byte) WritableUtils.decodeVIntSize(internalBytes[4 + len]);
+ if (secondVIntLen > 1) {
+ in.readFully(internalBytes, 5 + len, secondVIntLen - 1);
+ }
+ }
}
currentBytes = internalBytes;
this.offset = 0;
@@ -301,8 +348,8 @@ public class TimestampWritable implement
public int compareTo(TimestampWritable t) {
checkBytes();
- int s1 = this.getSeconds();
- int s2 = t.getSeconds();
+ long s1 = this.getSeconds();
+ long s2 = t.getSeconds();
if (s1 == s2) {
int n1 = this.getNanos();
int n2 = t.getNanos();
@@ -311,7 +358,7 @@ public class TimestampWritable implement
}
return n1 - n2;
} else {
- return s1 - s2;
+ return s1 < s2 ? -1 : 1;
}
}
@@ -342,7 +389,7 @@ public class TimestampWritable implement
@Override
public int hashCode() {
long seconds = getSeconds();
- seconds <<= 32;
+ seconds <<= 30; // the nanosecond part fits in 30 bits
seconds |= getNanos();
return (int) ((seconds >>> 32) ^ seconds);
}
@@ -362,13 +409,30 @@ public class TimestampWritable implement
* @param offset
* @return the number of seconds
*/
- public static int getSeconds(byte[] bytes, int offset) {
- return NO_DECIMAL_MASK & bytesToInt(bytes, offset);
+ public static long getSeconds(byte[] bytes, int offset) {
+ int lowest31BitsOfSecondsAndFlag = bytesToInt(bytes, offset);
+ if (lowest31BitsOfSecondsAndFlag >= 0 || // the "has decimal or second VInt" flag is not set
+ !hasSecondVInt(bytes[offset + 4])) {
+ // The entire seconds field is stored in the first 4 bytes.
+ return lowest31BitsOfSecondsAndFlag & LOWEST_31_BITS_OF_SEC_MASK;
+ }
+
+ // We compose the seconds field from two parts. The lowest 31 bits come from the first four
+ // bytes. The higher-order bits come from the second VInt that follows the nanos field.
+ return ((long) (lowest31BitsOfSecondsAndFlag & LOWEST_31_BITS_OF_SEC_MASK)) |
+ (LazyBinaryUtils.readVLongFromByteArray(bytes,
+ offset + 4 + WritableUtils.decodeVIntSize(bytes[offset + 4])) << 31);
}
public static int getNanos(byte[] bytes, int offset) {
+ VInt vInt = LazyBinaryUtils.threadLocalVInt.get();
LazyBinaryUtils.readVInt(bytes, offset, vInt);
int val = vInt.value;
+ if (val < 0) {
+ // This means there is a second VInt present that specifies additional bits of the timestamp.
+ // The reversed nanoseconds value is still encoded in this VInt.
+ val = -val - 1;
+ }
int len = (int) Math.floor(Math.log10(val)) + 1;
// Reverse the value
@@ -387,40 +451,33 @@ public class TimestampWritable implement
}
/**
- * Writes a Timestamp's serialized value to byte array b at
- * @param t
- * @param b
+ * Writes a Timestamp's serialized value to byte array b at the given offset
+ * @param timestamp to convert to bytes
+ * @param b destination byte array
+ * @param offset destination offset in the byte array
*/
public static void convertTimestampToBytes(Timestamp t, byte[] b,
int offset) {
- if (b.length < 9) {
- LOG.error("byte array too short");
- }
long millis = t.getTime();
int nanos = t.getNanos();
- boolean hasDecimal = nanos != 0 && setNanosBytes(nanos, b, offset+4);
- setSecondsBytes(millis, b, offset, hasDecimal);
- }
-
- /**
- * Given an integer representing seconds, write its serialized
- * value to the byte array b at offset
- * @param millis
- * @param b
- * @param offset
- * @param hasDecimal
- */
- private static void setSecondsBytes(long millis, byte[] b, int offset, boolean hasDecimal) {
- int seconds = (int) (millis / 1000);
-
- if (!hasDecimal) {
- seconds &= NO_DECIMAL_MASK;
+ long seconds = millisToSeconds(millis);
+ boolean hasSecondVInt = seconds < 0 || seconds > Integer.MAX_VALUE;
+ boolean hasDecimal = setNanosBytes(nanos, b, offset+4, hasSecondVInt);
+
+ int firstInt = (int) seconds;
+ if (hasDecimal || hasSecondVInt) {
+ firstInt |= DECIMAL_OR_SECOND_VINT_FLAG;
} else {
- seconds |= HAS_DECIMAL_MASK;
+ firstInt &= LOWEST_31_BITS_OF_SEC_MASK;
}
+ intToBytes(firstInt, b, offset);
- intToBytes(seconds, b, offset);
+ if (hasSecondVInt) {
+ LazyBinaryUtils.writeVLongToByteArray(b,
+ offset + 4 + WritableUtils.decodeVIntSize(b[offset + 4]),
+ seconds >> 31);
+ }
}
/**
@@ -432,7 +489,7 @@ public class TimestampWritable implement
* @param offset
* @return
*/
- private static boolean setNanosBytes(int nanos, byte[] b, int offset) {
+ private static boolean setNanosBytes(int nanos, byte[] b, int offset, boolean hasSecondVInt) {
int decimal = 0;
if (nanos != 0) {
int counter = 0;
@@ -444,7 +501,11 @@ public class TimestampWritable implement
}
}
- LazyBinaryUtils.writeVLongToByteArray(b, offset, decimal);
+ if (hasSecondVInt || decimal != 0) {
+ // We use the sign of the reversed-nanoseconds field to indicate that there is a second VInt
+ // present.
+ LazyBinaryUtils.writeVLongToByteArray(b, offset, hasSecondVInt ? (-decimal - 1) : decimal);
+ }
return decimal != 0;
}
@@ -458,11 +519,14 @@ public class TimestampWritable implement
}
public static Timestamp decimalToTimestamp(HiveDecimal d) {
- BigDecimal seconds = new BigDecimal(d.longValue());
- long millis = d.bigDecimalValue().multiply(new BigDecimal(1000)).longValue();
- int nanos = d.bigDecimalValue().subtract(seconds).multiply(new BigDecimal(1000000000)).intValue();
-
- Timestamp t = new Timestamp(millis);
+ BigDecimal nanoInstant = d.bigDecimalValue().multiply(BILLION_BIG_DECIMAL);
+ int nanos = nanoInstant.remainder(BILLION_BIG_DECIMAL).intValue();
+ if (nanos < 0) {
+ nanos += 1000000000;
+ }
+ long seconds =
+ nanoInstant.subtract(new BigDecimal(nanos)).divide(BILLION_BIG_DECIMAL).longValue();
+ Timestamp t = new Timestamp(seconds * 1000);
t.setNanos(nanos);
return t;
@@ -480,6 +544,10 @@ public class TimestampWritable implement
// Convert to millis
long millis = seconds * 1000;
+ if (nanos < 0) {
+ millis -= 1000;
+ nanos += 1000000000;
+ }
Timestamp t = new Timestamp(millis);
// Set remaining fractional portion to nanos
@@ -488,10 +556,19 @@ public class TimestampWritable implement
}
public static void setTimestamp(Timestamp t, byte[] bytes, int offset) {
- boolean hasDecimal = hasDecimal(bytes[offset]);
- t.setTime(((long) TimestampWritable.getSeconds(bytes, offset)) * 1000);
- if (hasDecimal) {
- t.setNanos(TimestampWritable.getNanos(bytes, offset+4));
+ boolean hasDecimalOrSecondVInt = hasDecimalOrSecondVInt(bytes[offset]);
+ long seconds = (long) TimestampWritable.getSeconds(bytes, offset);
+ int nanos = 0;
+ if (hasDecimalOrSecondVInt) {
+ nanos = TimestampWritable.getNanos(bytes, offset + 4);
+ if (hasSecondVInt(bytes[offset + 4])) {
+ seconds += LazyBinaryUtils.readVLongFromByteArray(bytes,
+ offset + 4 + WritableUtils.decodeVIntSize(bytes[offset + 4]));
+ }
+ }
+ t.setTime(seconds * 1000);
+ if (nanos != 0) {
+ t.setNanos(nanos);
}
}
@@ -501,17 +578,22 @@ public class TimestampWritable implement
return t;
}
- public boolean hasDecimal() {
- return hasDecimal(currentBytes[offset]);
+ private static boolean hasDecimalOrSecondVInt(byte b) {
+ return (b >> 7) != 0;
}
- /**
- *
- * @param b first byte in an encoded TimestampWritable
- * @return true if it has a decimal portion, false otherwise
- */
- public static boolean hasDecimal(byte b) {
- return (b >> 7) != 0;
+ private static boolean hasSecondVInt(byte b) {
+ return WritableUtils.isNegativeVInt(b);
+ }
+
+ private final boolean hasDecimalOrSecondVInt() {
+ return hasDecimalOrSecondVInt(currentBytes[offset]);
+ }
+
+ public final boolean hasDecimal() {
+ return hasDecimalOrSecondVInt() || currentBytes[offset + 4] != -1;
+ // If the first byte of the VInt is -1, the VInt itself is -1, indicating that there is a
+ // second VInt but the nanoseconds field is actually 0.
}
/**
@@ -528,6 +610,20 @@ public class TimestampWritable implement
}
/**
+ * Writes <code>value</code> into <code>dest</code> at <code>offset</code> as a seven-byte
+ * serialized long number.
+ */
+ static void sevenByteLongToBytes(long value, byte[] dest, int offset) {
+ dest[offset] = (byte) ((value >> 48) & 0xFF);
+ dest[offset+1] = (byte) ((value >> 40) & 0xFF);
+ dest[offset+2] = (byte) ((value >> 32) & 0xFF);
+ dest[offset+3] = (byte) ((value >> 24) & 0xFF);
+ dest[offset+4] = (byte) ((value >> 16) & 0xFF);
+ dest[offset+5] = (byte) ((value >> 8) & 0xFF);
+ dest[offset+6] = (byte) (value & 0xFF);
+ }
+
+ /**
*
* @param bytes
* @param offset
@@ -540,4 +636,27 @@ public class TimestampWritable implement
| ((0xFF & bytes[offset+2]) << 8)
| (0xFF & bytes[offset+3]);
}
+
+ static long readSevenByteLong(byte[] bytes, int offset) {
+ // We need to shift everything 8 bits left and then shift back to populate the sign field.
+ return (((0xFFL & bytes[offset]) << 56)
+ | ((0xFFL & bytes[offset+1]) << 48)
+ | ((0xFFL & bytes[offset+2]) << 40)
+ | ((0xFFL & bytes[offset+3]) << 32)
+ | ((0xFFL & bytes[offset+4]) << 24)
+ | ((0xFFL & bytes[offset+5]) << 16)
+ | ((0xFFL & bytes[offset+6]) << 8)) >> 8;
+ }
+
+ /**
+ * Rounds the number of milliseconds relative to the epoch down to the nearest whole number of
+ * seconds. 500 would round to 0, -500 would round to -1.
+ */
+ static long millisToSeconds(long millis) {
+ if (millis >= 0) {
+ return millis / 1000;
+ } else {
+ return (millis - 999) / 1000;
+ }
+ }
}
Modified: hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java?rev=1508669&r1=1508668&r2=1508669&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java (original)
+++ hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java Tue Jul 30 22:22:35 2013
@@ -38,6 +38,7 @@ import org.apache.hadoop.hive.serde2.laz
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyPrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyShortObjectInspector;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyDateObjectInspector;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyTimestampObjectInspector;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyVoidObjectInspector;
import org.apache.hadoop.hive.serde2.lazydio.LazyDioBoolean;
@@ -111,6 +112,8 @@ public final class LazyFactory {
return new LazyDouble((LazyDoubleObjectInspector) oi);
case STRING:
return new LazyString((LazyStringObjectInspector) oi);
+ case DATE:
+ return new LazyDate((LazyDateObjectInspector) oi);
case TIMESTAMP:
return new LazyTimestamp((LazyTimestampObjectInspector) oi);
case BINARY:
Modified: hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java?rev=1508669&r1=1508668&r2=1508669&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java (original)
+++ hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java Tue Jul 30 22:22:35 2013
@@ -43,6 +43,7 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.BytesWritable;
@@ -232,6 +233,11 @@ public final class LazyUtils {
out.write(toWrite, 0, toWrite.length);
break;
}
+ case DATE: {
+ LazyDate.writeUTF8(out,
+ ((DateObjectInspector) oi).getPrimitiveWritableObject(o));
+ break;
+ }
case TIMESTAMP: {
LazyTimestamp.writeUTF8(out,
((TimestampObjectInspector) oi).getPrimitiveWritableObject(o));
Modified: hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java?rev=1508669&r1=1508668&r2=1508669&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java (original)
+++ hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java Tue Jul 30 22:22:35 2013
@@ -53,6 +53,8 @@ public final class LazyPrimitiveObjectIn
new LazyDoubleObjectInspector();
public static final LazyVoidObjectInspector LAZY_VOID_OBJECT_INSPECTOR =
new LazyVoidObjectInspector();
+ public static final LazyDateObjectInspector LAZY_DATE_OBJECT_INSPECTOR =
+ new LazyDateObjectInspector();
public static final LazyTimestampObjectInspector LAZY_TIMESTAMP_OBJECT_INSPECTOR =
new LazyTimestampObjectInspector();
public static final LazyBinaryObjectInspector LAZY_BINARY_OBJECT_INSPECTOR =
@@ -101,6 +103,8 @@ public final class LazyPrimitiveObjectIn
return LAZY_BINARY_OBJECT_INSPECTOR;
case VOID:
return LAZY_VOID_OBJECT_INSPECTOR;
+ case DATE:
+ return LAZY_DATE_OBJECT_INSPECTOR;
case TIMESTAMP:
return LAZY_TIMESTAMP_OBJECT_INSPECTOR;
case DECIMAL:
Modified: hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java?rev=1508669&r1=1508668&r2=1508669&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java (original)
+++ hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java Tue Jul 30 22:22:35 2013
@@ -37,6 +37,7 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableVoidObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
@@ -72,6 +73,8 @@ public final class LazyBinaryFactory {
return new LazyBinaryString((WritableStringObjectInspector) oi);
case VOID: // for NULL
return new LazyBinaryVoid((WritableVoidObjectInspector) oi);
+ case DATE:
+ return new LazyBinaryDate((WritableDateObjectInspector) oi);
case TIMESTAMP:
return new LazyBinaryTimestamp((WritableTimestampObjectInspector) oi);
case BINARY:
Modified: hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java?rev=1508669&r1=1508668&r2=1508669&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java (original)
+++ hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java Tue Jul 30 22:22:35 2013
@@ -33,6 +33,7 @@ import org.apache.hadoop.hive.serde2.Byt
import org.apache.hadoop.hive.serde2.ByteStream.Output;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
@@ -43,12 +44,13 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
@@ -378,6 +380,11 @@ public class LazyBinarySerDe extends Abs
return warnedOnceNullMapKey;
}
+ case DATE: {
+ DateWritable d = ((DateObjectInspector) poi).getPrimitiveWritableObject(obj);
+ d.writeToByteStream(byteStream);
+ return warnedOnceNullMapKey;
+ }
case TIMESTAMP: {
TimestampObjectInspector toi = (TimestampObjectInspector) poi;
TimestampWritable t = toi.getPrimitiveWritableObject(obj);
@@ -557,6 +564,7 @@ public class LazyBinarySerDe extends Abs
* Returns the statistics after (de)serialization)
*/
+ @Override
public SerDeStats getSerDeStats() {
// must be different
assert (lastOperationSerialize != lastOperationDeserialize);
Modified: hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java?rev=1508669&r1=1508668&r2=1508669&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java (original)
+++ hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java Tue Jul 30 22:22:35 2013
@@ -196,12 +196,13 @@ public final class LazyBinaryUtils {
recordInfo.elementOffset = vInt.length;
recordInfo.elementSize = vInt.value;
break;
+ case DATE:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
+ break;
case TIMESTAMP:
recordInfo.elementOffset = 0;
- recordInfo.elementSize = 4;
- if(TimestampWritable.hasDecimal(bytes[offset])) {
- recordInfo.elementSize += (byte) WritableUtils.decodeVIntSize(bytes[offset+4]);
- }
+ recordInfo.elementSize = TimestampWritable.getTotalLength(bytes, offset);
break;
case DECIMAL:
// using vint instead of 4 bytes
@@ -281,6 +282,13 @@ public final class LazyBinaryUtils {
public byte length;
};
+ public static final ThreadLocal<VInt> threadLocalVInt = new ThreadLocal<VInt>() {
+ @Override
+ protected VInt initialValue() {
+ return new VInt();
+ }
+ };
+
/**
* Reads a zero-compressed encoded int from a byte array and returns it.
*
@@ -320,6 +328,28 @@ public final class LazyBinaryUtils {
}
/**
+ * Read a zero-compressed encoded long from a byte array.
+ *
+ * @param bytes the byte array
+ * @param offset the offset in the byte array where the VLong is stored
+ * @return the long
+ */
+ public static long readVLongFromByteArray(final byte[] bytes, int offset) {
+ byte firstByte = bytes[offset++];
+ int len = WritableUtils.decodeVIntSize(firstByte);
+ if (len == 1) {
+ return firstByte;
+ }
+ long i = 0;
+ for (int idx = 0; idx < len-1; idx++) {
+ byte b = bytes[offset++];
+ i = i << 8;
+ i = i | (b & 0xFF);
+ }
+ return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
+ }
+
+ /**
* Write a zero-compressed encoded long to a byte array.
*
* @param bytes
Modified: hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java?rev=1508669&r1=1508668&r2=1508669&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java (original)
+++ hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java Tue Jul 30 22:22:35 2013
@@ -34,6 +34,7 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableIntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableLongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDateObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableTimestampObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
@@ -100,6 +101,10 @@ public final class ObjectInspectorConver
return new PrimitiveObjectInspectorConverter.StringConverter(
inputOI);
}
+ case DATE:
+ return new PrimitiveObjectInspectorConverter.DateConverter(
+ inputOI,
+ (SettableDateObjectInspector) outputOI);
case TIMESTAMP:
return new PrimitiveObjectInspectorConverter.TimestampConverter(
inputOI,
Modified: hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java?rev=1508669&r1=1508668&r2=1508669&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java (original)
+++ hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java Tue Jul 30 22:22:35 2013
@@ -30,6 +30,7 @@ import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
@@ -38,6 +39,7 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector;
@@ -488,6 +490,8 @@ public final class ObjectInspectorUtils
case BINARY:
return ((BinaryObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();
+ case DATE:
+ return ((DateObjectInspector) poi).getPrimitiveWritableObject(o).hashCode();
case TIMESTAMP:
TimestampWritable t = ((TimestampObjectInspector) poi)
.getPrimitiveWritableObject(o);
@@ -684,6 +688,13 @@ public final class ObjectInspectorUtils
return bw1.compareTo(bw2);
}
+ case DATE: {
+ DateWritable d1 = ((DateObjectInspector) poi1)
+ .getPrimitiveWritableObject(o1);
+ DateWritable d2 = ((DateObjectInspector) poi2)
+ .getPrimitiveWritableObject(o2);
+ return d1.compareTo(d2);
+ }
case TIMESTAMP: {
TimestampWritable t1 = ((TimestampObjectInspector) poi1)
.getPrimitiveWritableObject(o1);
Modified: hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java?rev=1508669&r1=1508668&r2=1508669&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java (original)
+++ hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java Tue Jul 30 22:22:35 2013
@@ -27,7 +27,8 @@ public interface PrimitiveObjectInspecto
* The primitive types supported by Hive.
*/
public static enum PrimitiveCategory {
- VOID, BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING, TIMESTAMP, BINARY, DECIMAL, UNKNOWN
+ VOID, BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING,
+ DATE, TIMESTAMP, BINARY, DECIMAL, UNKNOWN
};
/**
Modified: hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java?rev=1508669&r1=1508668&r2=1508669&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java (original)
+++ hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java Tue Jul 30 22:22:35 2013
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.serde2.objectinspector.primitive;
+import java.sql.Date;
import java.sql.Timestamp;
import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -237,6 +238,27 @@ public class PrimitiveObjectInspectorCon
}
}
+ public static class DateConverter implements Converter {
+ PrimitiveObjectInspector inputOI;
+ SettableDateObjectInspector outputOI;
+ Object r;
+
+ public DateConverter(PrimitiveObjectInspector inputOI,
+ SettableDateObjectInspector outputOI) {
+ this.inputOI = inputOI;
+ this.outputOI = outputOI;
+ r = outputOI.create(new Date(0));
+ }
+
+ public Object convert(Object input) {
+ if (input == null) {
+ return null;
+ }
+ return outputOI.set(r, PrimitiveObjectInspectorUtils.getDate(input,
+ inputOI));
+ }
+ }
+
public static class TimestampConverter implements Converter {
PrimitiveObjectInspector inputOI;
SettableTimestampObjectInspector outputOI;
@@ -276,7 +298,7 @@ public class PrimitiveObjectInspectorCon
if (input == null) {
return null;
}
-
+
try {
return outputOI.set(r, PrimitiveObjectInspectorUtils.getHiveDecimal(input,
inputOI));
@@ -368,6 +390,9 @@ public class PrimitiveObjectInspectorCon
t.set(((StringObjectInspector) inputOI).getPrimitiveJavaObject(input));
}
return t;
+ case DATE:
+ t.set(((DateObjectInspector) inputOI).getPrimitiveWritableObject(input).toString());
+ return t;
case TIMESTAMP:
t.set(((TimestampObjectInspector) inputOI)
.getPrimitiveWritableObject(input).toString());
Modified: hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java?rev=1508669&r1=1508668&r2=1508669&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java (original)
+++ hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java Tue Jul 30 22:22:35 2013
@@ -24,6 +24,7 @@ import org.apache.hadoop.hive.serde2.io.
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
@@ -66,6 +67,8 @@ public final class PrimitiveObjectInspec
new JavaStringObjectInspector();
public static final JavaVoidObjectInspector javaVoidObjectInspector =
new JavaVoidObjectInspector();
+ public static final JavaDateObjectInspector javaDateObjectInspector =
+ new JavaDateObjectInspector();
public static final JavaTimestampObjectInspector javaTimestampObjectInspector =
new JavaTimestampObjectInspector();
public static final JavaBinaryObjectInspector javaByteArrayObjectInspector =
@@ -91,6 +94,8 @@ public final class PrimitiveObjectInspec
new WritableStringObjectInspector();
public static final WritableVoidObjectInspector writableVoidObjectInspector =
new WritableVoidObjectInspector();
+ public static final WritableDateObjectInspector writableDateObjectInspector =
+ new WritableDateObjectInspector();
public static final WritableTimestampObjectInspector writableTimestampObjectInspector =
new WritableTimestampObjectInspector();
public static final WritableBinaryObjectInspector writableBinaryObjectInspector =
@@ -119,6 +124,8 @@ public final class PrimitiveObjectInspec
writableStringObjectInspector);
cachedPrimitiveWritableInspectorCache.put(PrimitiveCategory.VOID,
writableVoidObjectInspector);
+ cachedPrimitiveWritableInspectorCache.put(PrimitiveCategory.DATE,
+ writableDateObjectInspector);
cachedPrimitiveWritableInspectorCache.put(PrimitiveCategory.TIMESTAMP,
writableTimestampObjectInspector);
cachedPrimitiveWritableInspectorCache.put(PrimitiveCategory.BINARY,
@@ -148,6 +155,8 @@ public final class PrimitiveObjectInspec
javaStringObjectInspector);
cachedPrimitiveJavaInspectorCache.put(PrimitiveCategory.VOID,
javaVoidObjectInspector);
+ cachedPrimitiveJavaInspectorCache.put(PrimitiveCategory.DATE,
+ javaDateObjectInspector);
cachedPrimitiveJavaInspectorCache.put(PrimitiveCategory.TIMESTAMP,
javaTimestampObjectInspector);
cachedPrimitiveJavaInspectorCache.put(PrimitiveCategory.BINARY,
@@ -198,6 +207,8 @@ public final class PrimitiveObjectInspec
return new WritableConstantDoubleObjectInspector((DoubleWritable)value);
case STRING:
return new WritableConstantStringObjectInspector((Text)value);
+ case DATE:
+ return new WritableConstantDateObjectInspector((DateWritable)value);
case TIMESTAMP:
return new WritableConstantTimestampObjectInspector((TimestampWritable)value);
case DECIMAL:
Modified: hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java?rev=1508669&r1=1508668&r2=1508669&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java (original)
+++ hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java Tue Jul 30 22:22:35 2013
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.serde2.ob
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import java.sql.Date;
import java.sql.Timestamp;
import java.util.HashMap;
import java.util.Map;
@@ -28,6 +29,7 @@ import java.util.Map;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
@@ -177,7 +179,9 @@ public final class PrimitiveObjectInspec
public static final PrimitiveTypeEntry shortTypeEntry = new PrimitiveTypeEntry(
PrimitiveCategory.SHORT, serdeConstants.SMALLINT_TYPE_NAME, Short.TYPE,
Short.class, ShortWritable.class);
-
+ public static final PrimitiveTypeEntry dateTypeEntry = new PrimitiveTypeEntry(
+ PrimitiveCategory.DATE, serdeConstants.DATE_TYPE_NAME, null,
+ Date.class, DateWritable.class);
public static final PrimitiveTypeEntry timestampTypeEntry = new PrimitiveTypeEntry(
PrimitiveCategory.TIMESTAMP, serdeConstants.TIMESTAMP_TYPE_NAME, null,
Timestamp.class, TimestampWritable.class);
@@ -200,6 +204,7 @@ public final class PrimitiveObjectInspec
registerType(doubleTypeEntry);
registerType(byteTypeEntry);
registerType(shortTypeEntry);
+ registerType(dateTypeEntry);
registerType(timestampTypeEntry);
registerType(decimalTypeEntry);
registerType(unknownTypeEntry);
@@ -361,6 +366,10 @@ public final class PrimitiveObjectInspec
.getPrimitiveWritableObject(o2);
return t1.equals(t2);
}
+ case DATE: {
+ return ((DateObjectInspector) oi1).getPrimitiveWritableObject(o1)
+ .equals(((DateObjectInspector) oi2).getPrimitiveWritableObject(o2));
+ }
case TIMESTAMP: {
return ((TimestampObjectInspector) oi1).getPrimitiveWritableObject(o1)
.equals(((TimestampObjectInspector) oi2).getPrimitiveWritableObject(o2));
@@ -404,6 +413,7 @@ public final class PrimitiveObjectInspec
.getDouble();
case DECIMAL:
return ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o).doubleValue();
+ case DATE: // unsupported conversion
default:
throw new NumberFormatException();
}
@@ -482,8 +492,9 @@ public final class PrimitiveObjectInspec
result = HiveDecimal.ZERO.compareTo(
((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o)) != 0;
break;
+ case DATE: // unsupported conversion
default:
- throw new RuntimeException("Hive 2 Internal error: unknown type: "
+ throw new RuntimeException("Hive 2 Internal error: unsupported conversion from type: "
+ oi.getTypeName());
}
return result;
@@ -566,8 +577,9 @@ public final class PrimitiveObjectInspec
result = ((HiveDecimalObjectInspector) oi)
.getPrimitiveJavaObject(o).intValue();
break;
+ case DATE: // unsupported conversion
default: {
- throw new RuntimeException("Hive 2 Internal error: unknown type: "
+ throw new RuntimeException("Hive 2 Internal error: unsupported conversion from type: "
+ oi.getTypeName());
}
}
@@ -624,8 +636,9 @@ public final class PrimitiveObjectInspec
result = ((HiveDecimalObjectInspector) oi)
.getPrimitiveJavaObject(o).longValue();
break;
+ case DATE: // unsupported conversion
default:
- throw new RuntimeException("Hive 2 Internal error: unknown type: "
+ throw new RuntimeException("Hive 2 Internal error: unsupported conversion from type: "
+ oi.getTypeName());
}
return result;
@@ -675,8 +688,9 @@ public final class PrimitiveObjectInspec
result = ((HiveDecimalObjectInspector) oi)
.getPrimitiveJavaObject(o).doubleValue();
break;
+ case DATE: // unsupported conversion
default:
- throw new RuntimeException("Hive 2 Internal error: unknown type: "
+ throw new RuntimeException("Hive 2 Internal error: unsupported conversion from type: "
+ oi.getTypeName());
}
return result;
@@ -732,6 +746,9 @@ public final class PrimitiveObjectInspec
StringObjectInspector soi = (StringObjectInspector) oi;
result = soi.getPrimitiveJavaObject(o);
break;
+ case DATE:
+ result = ((DateObjectInspector) oi).getPrimitiveWritableObject(o).toString();
+ break;
case TIMESTAMP:
result = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o).toString();
break;
@@ -817,13 +834,48 @@ public final class PrimitiveObjectInspec
case DECIMAL:
result = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o);
break;
+ case DATE: // unsupported conversion
default:
- throw new RuntimeException("Hive 2 Internal error: unknown type: "
+ throw new RuntimeException("Hive 2 Internal error: unsupported conversion from type: "
+ oi.getTypeName());
}
return result;
}
+ public static Date getDate(Object o, PrimitiveObjectInspector oi) {
+ if (o == null) {
+ return null;
+ }
+
+ Date result = null;
+ switch (oi.getPrimitiveCategory()) {
+ case VOID:
+ result = null;
+ break;
+ case STRING:
+ StringObjectInspector soi = (StringObjectInspector) oi;
+ String s = soi.getPrimitiveJavaObject(o).trim();
+ try {
+ result = Date.valueOf(s);
+ } catch (IllegalArgumentException e) {
+ result = null;
+ }
+ break;
+ case DATE:
+ result = ((DateObjectInspector) oi).getPrimitiveWritableObject(o).get();
+ break;
+ case TIMESTAMP:
+ result = DateWritable.timeToDate(
+ ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o).getSeconds());
+ break;
+ default:
+ throw new RuntimeException("Cannot convert to Date from: "
+ + oi.getTypeName());
+ }
+
+ return result;
+ }
+
public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector oi) {
if (o == null) {
return null;
@@ -876,6 +928,10 @@ public final class PrimitiveObjectInspec
result = null;
}
break;
+ case DATE:
+ result = new Timestamp(
+ ((DateObjectInspector) oi).getPrimitiveWritableObject(o).get().getTime());
+ break;
case TIMESTAMP:
result = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o).getTimestamp();
break;
Modified: hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java?rev=1508669&r1=1508668&r2=1508669&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java (original)
+++ hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java Tue Jul 30 22:22:35 2013
@@ -62,6 +62,7 @@ public final class TypeInfoFactory {
public static final TypeInfo doubleTypeInfo = getPrimitiveTypeInfo(serdeConstants.DOUBLE_TYPE_NAME);
public static final TypeInfo byteTypeInfo = getPrimitiveTypeInfo(serdeConstants.TINYINT_TYPE_NAME);
public static final TypeInfo shortTypeInfo = getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME);
+ public static final TypeInfo dateTypeInfo = getPrimitiveTypeInfo(serdeConstants.DATE_TYPE_NAME);
public static final TypeInfo timestampTypeInfo = getPrimitiveTypeInfo(serdeConstants.TIMESTAMP_TYPE_NAME);
public static final TypeInfo binaryTypeInfo = getPrimitiveTypeInfo(serdeConstants.BINARY_TYPE_NAME);
public static final TypeInfo decimalTypeInfo = getPrimitiveTypeInfo(serdeConstants.DECIMAL_TYPE_NAME);
Modified: hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/TestStatsSerde.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/TestStatsSerde.java?rev=1508669&r1=1508668&r2=1508669&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/TestStatsSerde.java (original)
+++ hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/TestStatsSerde.java Tue Jul 30 22:22:35 2013
@@ -18,7 +18,7 @@
package org.apache.hadoop.hive.serde2;
-import java.math.BigInteger;
+import java.sql.Date;
import java.util.List;
import java.util.Properties;
import java.util.Random;
@@ -114,13 +114,14 @@ public class TestStatsSerde extends Test
Double d = randField > 5 ? null : Double.valueOf(r.nextDouble());
String st = randField > 6 ? null : TestBinarySortableSerDe
.getRandString(r);
- HiveDecimal bd = randField > 8 ? null : TestBinarySortableSerDe.getRandHiveDecimal(r);
+ HiveDecimal bd = randField > 7 ? null : TestBinarySortableSerDe.getRandHiveDecimal(r);
+ Date date = randField > 8 ? null : TestBinarySortableSerDe.getRandDate(r);
MyTestInnerStruct is = randField > 9 ? null : new MyTestInnerStruct(r
.nextInt(5) - 2, r.nextInt(5) - 2);
List<Integer> li = randField > 10 ? null : TestBinarySortableSerDe
.getRandIntegerArray(r);
byte[] ba = TestBinarySortableSerDe.getRandBA(r, i);
- MyTestClass t = new MyTestClass(b, s, n, l, f, d, st, bd, is, li,ba);
+ MyTestClass t = new MyTestClass(b, s, n, l, f, d, st, bd, date, is, li,ba);
rows[i] = t;
}
Modified: hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java?rev=1508669&r1=1508668&r2=1508669&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java (original)
+++ hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java Tue Jul 30 22:22:35 2013
@@ -17,6 +17,18 @@
*/
package org.apache.hadoop.hive.serde2.avro;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Hashtable;
+import java.util.List;
+import java.util.Map;
+
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.hadoop.hive.serde2.SerDeException;
@@ -30,18 +42,6 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector;
import org.junit.Test;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Hashtable;
-import java.util.List;
-import java.util.Map;
-import java.util.HashMap;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
public class TestAvroDeserializer {
private final GenericData GENERIC_DATA = GenericData.get();
@@ -338,12 +338,12 @@ public class TestAvroDeserializer {
ArrayList<Object> row =
(ArrayList<Object>)de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s);
assertEquals(1, row.size());
- Object theArrayObject = row.get(0);
- assertTrue(theArrayObject instanceof List);
- List theList = (List)theArrayObject;
+ Object byteObject = row.get(0);
+ assertTrue(byteObject instanceof byte[]);
+ byte[] outBytes = (byte[]) byteObject;
// Verify the raw object that's been created
for(int i = 0; i < bytes.length; i++) {
- assertEquals(bytes[i], theList.get(i));
+ assertEquals(bytes[i], outBytes[i]);
}
// Now go the correct way, through objectinspectors
@@ -352,9 +352,9 @@ public class TestAvroDeserializer {
assertEquals(1, fieldsDataAsList.size());
StructField fieldRef = oi.getStructFieldRef("hash");
- List theList2 = (List)oi.getStructFieldData(row, fieldRef);
- for(int i = 0; i < bytes.length; i++) {
- assertEquals(bytes[i], theList2.get(i));
+ outBytes = (byte[]) oi.getStructFieldData(row, fieldRef);
+ for(int i = 0; i < outBytes.length; i++) {
+ assertEquals(bytes[i], outBytes[i]);
}
}
@@ -377,8 +377,13 @@ public class TestAvroDeserializer {
ArrayList<Object> row =
(ArrayList<Object>)de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s);
assertEquals(1, row.size());
- Object theArrayObject = row.get(0);
- assertTrue(theArrayObject instanceof List);
+ Object byteObject = row.get(0);
+ assertTrue(byteObject instanceof byte[]);
+ byte[] outBytes = (byte[]) byteObject;
+ // Verify the raw object that's been created
+ for(int i = 0; i < bytes.length; i++) {
+ assertEquals(bytes[i], outBytes[i]);
+ }
// Now go the correct way, through objectinspectors
StandardStructObjectInspector oi = (StandardStructObjectInspector)aoig.getObjectInspector();
@@ -386,9 +391,9 @@ public class TestAvroDeserializer {
assertEquals(1, fieldsDataAsList.size());
StructField fieldRef = oi.getStructFieldRef("bytesField");
- List theList2 = (List)oi.getStructFieldData(row, fieldRef);
- for(int i = 0; i < bytes.length; i++) {
- assertEquals(bytes[i], theList2.get(i));
+ outBytes = (byte[]) oi.getStructFieldData(row, fieldRef);
+ for(int i = 0; i < outBytes.length; i++) {
+ assertEquals(bytes[i], outBytes[i]);
}
}
@@ -489,9 +494,10 @@ public class TestAvroDeserializer {
ObjectInspector fieldObjectInspector = fieldRef.getFieldObjectInspector();
StringObjectInspector soi = (StringObjectInspector)fieldObjectInspector;
- if(expected == null)
+ if(expected == null) {
assertNull(soi.getPrimitiveJavaObject(rowElement));
- else
+ } else {
assertEquals(expected, soi.getPrimitiveJavaObject(rowElement));
+ }
}
}
Modified: hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java?rev=1508669&r1=1508668&r2=1508669&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java (original)
+++ hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java Tue Jul 30 22:22:35 2013
@@ -17,10 +17,18 @@
*/
package org.apache.hadoop.hive.serde2.avro;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.util.ArrayList;
+import java.util.List;
+
import org.apache.avro.Schema;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
@@ -32,13 +40,6 @@ import org.apache.hadoop.hive.serde2.typ
import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
import org.junit.Test;
-import java.util.ArrayList;
-import java.util.List;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
public class TestAvroObjectInspectorGenerator {
private final TypeInfo STRING = TypeInfoFactory.getPrimitiveTypeInfo("string");
private final TypeInfo INT = TypeInfoFactory.getPrimitiveTypeInfo("int");
@@ -353,7 +354,7 @@ public class TestAvroObjectInspectorGene
AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
verifyMap(aoig, "aMap");
}
-
+
/**
* Check a given AvroObjectInspectorGenerator to verify that it matches our test
* schema's expected map.
@@ -476,10 +477,8 @@ public class TestAvroObjectInspectorGene
// Column types
assertEquals(1, aoig.getColumnTypes().size());
TypeInfo typeInfo = aoig.getColumnTypes().get(0);
- assertTrue(typeInfo instanceof ListTypeInfo);
- ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
- assertTrue(listTypeInfo.getListElementTypeInfo() instanceof PrimitiveTypeInfo);
- assertEquals("tinyint", listTypeInfo.getListElementTypeInfo().getTypeName());
+ assertTrue(typeInfo instanceof PrimitiveTypeInfo);
+ assertEquals(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(), PrimitiveCategory.BINARY);
}
@Test // Avro considers bytes primitive, Hive doesn't. Make them list of tinyint.
@@ -495,10 +494,8 @@ public class TestAvroObjectInspectorGene
// Column types
assertEquals(1, aoig.getColumnTypes().size());
TypeInfo typeInfo = aoig.getColumnTypes().get(0);
- assertTrue(typeInfo instanceof ListTypeInfo);
- ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
- assertTrue(listTypeInfo.getListElementTypeInfo() instanceof PrimitiveTypeInfo);
- assertEquals("tinyint", listTypeInfo.getListElementTypeInfo().getTypeName());
+ assertTrue(typeInfo instanceof PrimitiveTypeInfo);
+ assertEquals(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(), PrimitiveCategory.BINARY);
}
@Test // That Union[T, NULL] is converted to just T.
Modified: hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java?rev=1508669&r1=1508668&r2=1508669&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java (original)
+++ hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java Tue Jul 30 22:22:35 2013
@@ -17,10 +17,11 @@
*/
package org.apache.hadoop.hive.serde2.binarysortable;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-
+import java.sql.Date;
import java.util.List;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+
public class MyTestClass {
Byte myByte;
Short myShort;
@@ -30,6 +31,7 @@ public class MyTestClass {
Double myDouble;
String myString;
HiveDecimal myDecimal;
+ Date myDate;
MyTestInnerStruct myStruct;
List<Integer> myList;
byte[] myBA;
@@ -38,7 +40,7 @@ public class MyTestClass {
}
public MyTestClass(Byte b, Short s, Integer i, Long l, Float f, Double d,
- String st, HiveDecimal bd, MyTestInnerStruct is, List<Integer> li, byte[] ba) {
+ String st, HiveDecimal bd, Date date, MyTestInnerStruct is, List<Integer> li, byte[] ba) {
myByte = b;
myShort = s;
myInt = i;
@@ -47,6 +49,7 @@ public class MyTestClass {
myDouble = d;
myString = st;
myDecimal = bd;
+ myDate = date;
myStruct = is;
myList = li;
myBA = ba;
Modified: hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java?rev=1508669&r1=1508668&r2=1508669&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java (original)
+++ hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java Tue Jul 30 22:22:35 2013
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hive.serde2.binarysortable;
+import java.sql.Date;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -153,6 +154,15 @@ public class TestBinarySortableSerDe ext
return bd;
}
+ public static Date getRandDate(Random r) {
+ String dateStr = String.format("%d-%02d-%02d",
+ Integer.valueOf(1800 + r.nextInt(500)), // year
+ Integer.valueOf(1 + r.nextInt(12)), // month
+ Integer.valueOf(1 + r.nextInt(28))); // day
+ Date dateVal = Date.valueOf(dateStr);
+ return dateVal;
+ }
+
public static String getRandString(Random r) {
return getRandString(r, null, r.nextInt(10));
}
@@ -196,7 +206,7 @@ public class TestBinarySortableSerDe ext
MyTestClass rows[] = new MyTestClass[num];
for (int i = 0; i < num; i++) {
- int randField = r.nextInt(10);
+ int randField = r.nextInt(11);
MyTestClass t = new MyTestClass();
t.myByte = randField > 0 ? null : Byte.valueOf((byte) r.nextInt());
t.myShort = randField > 1 ? null : Short.valueOf((short) r.nextInt());
@@ -208,9 +218,10 @@ public class TestBinarySortableSerDe ext
.valueOf(r.nextDouble() * 10 - 5);
t.myString = randField > 6 ? null : getRandString(r);
t.myDecimal = randField > 7 ? null : getRandHiveDecimal(r);
- t.myStruct = randField > 8 ? null : new MyTestInnerStruct(
+ t.myDate = randField > 8 ? null : getRandDate(r);
+ t.myStruct = randField > 9 ? null : new MyTestInnerStruct(
r.nextInt(5) - 2, r.nextInt(5) - 2);
- t.myList = randField > 9 ? null : getRandIntegerArray(r);
+ t.myList = randField > 10 ? null : getRandIntegerArray(r);
t.myBA = getRandBA(r, i);
rows[i] = t;
}
@@ -224,9 +235,9 @@ public class TestBinarySortableSerDe ext
String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI);
testBinarySortableSerDe(rows, rowOI, getSerDe(fieldNames, fieldTypes,
- "+++++++++++"), true);
+ "++++++++++++"), true);
testBinarySortableSerDe(rows, rowOI, getSerDe(fieldNames, fieldTypes,
- "-----------"), false);
+ "------------"), false);
System.out.println("Test testTBinarySortableProtocol passed!");
} catch (Throwable e) {
Modified: hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java?rev=1508669&r1=1508668&r2=1508669&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java (original)
+++ hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java Tue Jul 30 22:22:35 2013
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hive.serde2.lazybinary;
+import java.sql.Date;
import java.util.List;
import java.util.Map;
@@ -36,6 +37,7 @@ public class MyTestClassBigger {
Double myDouble;
String myString;
HiveDecimal myDecimal;
+ Date myDate;
MyTestInnerStruct myStruct;
List<Integer> myList;
byte[] myBA;
@@ -45,7 +47,7 @@ public class MyTestClassBigger {
}
public MyTestClassBigger(Byte b, Short s, Integer i, Long l, Float f,
- Double d, String st, HiveDecimal bd, MyTestInnerStruct is, List<Integer> li,
+ Double d, String st, HiveDecimal bd, Date date, MyTestInnerStruct is, List<Integer> li,
byte[] ba, Map<String, List<MyTestInnerStruct>> mp) {
myByte = b;
myShort = s;
@@ -55,6 +57,7 @@ public class MyTestClassBigger {
myDouble = d;
myString = st;
myDecimal = bd;
+ myDate = date;
myStruct = is;
myList = li;
myBA = ba;
Modified: hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java?rev=1508669&r1=1508668&r2=1508669&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java (original)
+++ hive/branches/vectorization/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java Tue Jul 30 22:22:35 2013
@@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hive.serde2.lazybinary;
+import java.sql.Date;
+
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct;
@@ -29,13 +31,14 @@ public class MyTestClassSmaller {
Double myDouble;
String myString;
HiveDecimal myDecimal;
+ Date myDate;
MyTestInnerStruct myStruct;
public MyTestClassSmaller() {
}
-
+
public MyTestClassSmaller(Byte b, Short s, Integer i, Long l, Float f,
- Double d, String st, HiveDecimal bd, MyTestInnerStruct is) {
+ Double d, String st, HiveDecimal bd, Date date, MyTestInnerStruct is) {
myByte = b;
myShort = s;
myInt = i;
@@ -44,6 +47,7 @@ public class MyTestClassSmaller {
myDouble = d;
myString = st;
myDecimal = bd;
+ myDate = date;
myStruct = is;
}
}