You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hive.apache.org by cw...@apache.org on 2018/08/29 20:51:27 UTC

[1/2] hive git commit: HIVE-20225: SerDe to support Teradata Binary Format (Lu Li via cws)

Repository: hive
Updated Branches:
  refs/heads/master cf5486dd3 -> b8d82844b


http://git-wip-us.apache.org/repos/asf/hive/blob/b8d82844/serde/src/java/org/apache/hadoop/hive/serde2/teradata/TeradataBinarySerde.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/teradata/TeradataBinarySerde.java b/serde/src/java/org/apache/hadoop/hive/serde2/teradata/TeradataBinarySerde.java
new file mode 100644
index 0000000..ccf5f44
--- /dev/null
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/teradata/TeradataBinarySerde.java
@@ -0,0 +1,597 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.teradata;
+
+import com.google.common.collect.ImmutableMap;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
+import org.apache.hadoop.hive.serde2.io.DateWritableV2;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.AbstractSerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeSpec;
+import org.apache.hadoop.hive.serde2.SerDeStats;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.hive.common.type.Date;
+
+import javax.annotation.Nullable;
+import java.io.ByteArrayInputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.text.ParseException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import static java.lang.String.format;
+
+/**
+ * https://cwiki.apache.org/confluence/display/Hive/TeradataBinarySerde.
+ * TeradataBinarySerde handles the serialization and deserialization of Teradata Binary Record
+ * passed from TeradataBinaryRecordReader.
+ *
+ * The Teradata Binary Record uses little-endian to handle the SHORT, INT, LONG, DOUBLE...
+ * We extend SwappedDataInputStream to handle these types and extend to handle the Teradata
+ * specific types like VARCHAR, CHAR, TIMESTAMP, DATE...
+ *
+ * Currently we support 11 Teradata data types: VARCHAR ,INTEGER, TIMESTAMP, FLOAT, DATE,
+ * BYTEINT, BIGINT, CHARACTER, DECIMAL, SMALLINT, VARBYTE.
+ * The mapping between Teradata data type and Hive data type is
+ * Teradata Data Type: Hive Data Type
+ * VARCHAR: VARCHAR,
+ * INTEGER: INT,
+ * TIMESTAMP: TIMESTAMP,
+ * FLOAT: DOUBLE,
+ * DATE: DATE,
+ * BYTEINT: TINYINT ,
+ * BIGINT: BIGINT,
+ * CHAR: CHAR,
+ * DECIMAL: DECIMAL,
+ * SMALLINT: SMALLINT,
+ * VARBYTE: BINARY.
+ *
+ * TeradataBinarySerde currently doesn't support complex types like MAP, ARRAY and STRUCT.
+ */
+@SerDeSpec(schemaProps = { serdeConstants.LIST_COLUMNS,
+    serdeConstants.LIST_COLUMN_TYPES }) public class TeradataBinarySerde extends AbstractSerDe {
+  private static final Log LOG = LogFactory.getLog(TeradataBinarySerde.class);
+
+  public static final String TD_SCHEMA_LITERAL = "teradata.schema.literal";
+
+  private StructObjectInspector rowOI;
+  private ArrayList<Object> row;
+  private byte[] inForNull;
+
+  private int numCols;
+  private List<String> columnNames;
+  private List<TypeInfo> columnTypes;
+
+  private TeradataBinaryDataOutputStream out;
+  private BytesWritable serializeBytesWritable;
+  private byte[] outForNull;
+
+  public static final String TD_TIMESTAMP_PRECISION = "teradata.timestamp.precision";
+  private int timestampPrecision;
+  private static final int DEFAULT_TIMESTAMP_BYTE_NUM = 19;
+  private static final String DEFAULT_TIMESTAMP_PRECISION = "6";
+
+  public static final String TD_CHAR_SET = "teradata.char.charset";
+  private String charCharset;
+  private static final String DEFAULT_CHAR_CHARSET = "UNICODE";
+  private static final Map<String, Integer> CHARSET_TO_BYTE_NUM = ImmutableMap.of("LATIN", 2, "UNICODE", 3);
+
+  /**
+   * Initialize the HiveSerializer.
+   *
+   * @param conf
+   *          System properties. Can be null in compile time
+   * @param tbl
+   *          table properties
+   * @throws SerDeException
+   */
+  @Override public void initialize(@Nullable Configuration conf, Properties tbl) throws SerDeException {
+    columnNames = Arrays.asList(tbl.getProperty(serdeConstants.LIST_COLUMNS).split(","));
+
+    String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
+    LOG.debug(serdeConstants.LIST_COLUMN_TYPES + ": " + columnTypeProperty);
+    if (columnTypeProperty.length() == 0) {
+      columnTypes = new ArrayList<TypeInfo>();
+    } else {
+      columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
+    }
+
+    assert columnNames.size() == columnTypes.size();
+    numCols = columnNames.size();
+
+    // get the configured teradata timestamp precision
+    // you can configure to generate timestamp of different precision in the binary file generated by TPT/BTEQ
+    timestampPrecision = Integer.parseInt(tbl.getProperty(TD_TIMESTAMP_PRECISION, DEFAULT_TIMESTAMP_PRECISION));
+
+    // get the configured teradata char charset
+    // in TD, latin charset will have 2 bytes per char and unicode will have 3 bytes per char
+    charCharset = tbl.getProperty(TD_CHAR_SET, DEFAULT_CHAR_CHARSET);
+    if (!CHARSET_TO_BYTE_NUM.containsKey(charCharset)) {
+      throw new SerDeException(
+          format("%s isn't supported in Teradata Char Charset %s", charCharset, CHARSET_TO_BYTE_NUM.keySet()));
+    }
+
+    // All columns have to be primitive.
+    // Constructing the row ObjectInspector:
+    List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(numCols);
+    for (int i = 0; i < numCols; i++) {
+      if (columnTypes.get(i).getCategory() != ObjectInspector.Category.PRIMITIVE) {
+        throw new SerDeException(
+            getClass().getName() + " only accepts primitive columns, but column[" + i + "] named " + columnNames.get(i)
+                + " has category " + columnTypes.get(i).getCategory());
+      }
+      columnOIs.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(columnTypes.get(i)));
+    }
+
+    rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs);
+
+    // Constructing the row object and will be reused for all rows
+    row = new ArrayList<Object>(numCols);
+    for (int i = 0; i < numCols; i++) {
+      row.add(null);
+    }
+
+    // Initialize vars related to Null Array which represents the null bitmap
+    int byteNumForNullArray = (numCols / 8) + ((numCols % 8 == 0) ? 0 : 1);
+    LOG.debug(format("The Null Bytes for each record will have %s bytes", byteNumForNullArray));
+    inForNull = new byte[byteNumForNullArray];
+
+    out = new TeradataBinaryDataOutputStream();
+    serializeBytesWritable = new BytesWritable();
+    outForNull = new byte[byteNumForNullArray];
+  }
+
+  /**
+   * Returns the Writable class that would be returned by the serialize method.
+   * This is used to initialize SequenceFile header.
+   */
+  @Override public Class<? extends Writable> getSerializedClass() {
+    return ByteWritable.class;
+  }
+
+  /**
+   * Serialize an object by navigating inside the Object with the
+   * ObjectInspector. In most cases, the return value of this function will be
+   * constant since the function will reuse the Writable object. If the client
+   * wants to keep a copy of the Writable, the client needs to clone the
+   * returned value.
+
+   * @param obj
+   * @param objInspector
+   */
+  @Override public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
+    try {
+      out.reset();
+      final StructObjectInspector outputRowOI = (StructObjectInspector) objInspector;
+      final List<? extends StructField> fieldRefs = outputRowOI.getAllStructFieldRefs();
+
+      if (fieldRefs.size() != numCols) {
+        throw new SerDeException(
+            "Cannot serialize the object because there are " + fieldRefs.size() + " fieldRefs but the table defined "
+                + numCols + " columns.");
+      }
+
+      // Fully refresh the Null Array to write into the out
+      for (int i = 0; i < numCols; i++) {
+        Object objectForField = outputRowOI.getStructFieldData(obj, fieldRefs.get(i));
+        if (objectForField == null) {
+          outForNull[i / 8] = (byte) (outForNull[i / 8] | (0x01 << (7 - (i % 8))));
+        } else {
+          outForNull[i / 8] = (byte) (outForNull[i / 8] & ~(0x01 << (7 - (i % 8))));
+        }
+      }
+      out.write(outForNull);
+
+      // serialize each field using FieldObjectInspector
+      for (int i = 0; i < numCols; i++) {
+        Object objectForField = outputRowOI.getStructFieldData(obj, fieldRefs.get(i));
+        serializeField(objectForField, fieldRefs.get(i).getFieldObjectInspector(), columnTypes.get(i));
+      }
+
+      serializeBytesWritable.set(out.toByteArray(), 0, out.size());
+      return serializeBytesWritable;
+    } catch (IOException e) {
+      throw new SerDeException(e);
+    }
+  }
+
+  private void serializeField(Object objectForField, ObjectInspector oi, TypeInfo ti)
+      throws IOException, SerDeException {
+    switch (oi.getCategory()) {
+    case PRIMITIVE:
+      PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
+      switch (poi.getPrimitiveCategory()) {
+      // Teradata Type: BYTEINT
+      case BYTE:
+        ByteObjectInspector boi = (ByteObjectInspector) poi;
+        byte b = 0;
+        if (objectForField != null) {
+          b = boi.get(objectForField);
+        }
+        out.write(b);
+        return;
+      // Teradata Type: SMALLINT
+      case SHORT:
+        ShortObjectInspector spoi = (ShortObjectInspector) poi;
+        short s = 0;
+        if (objectForField != null) {
+          s = spoi.get(objectForField);
+        }
+        out.writeShort(s);
+        return;
+      // Teradata Type: INT
+      case INT:
+        IntObjectInspector ioi = (IntObjectInspector) poi;
+        int i = 0;
+        if (objectForField != null) {
+          i = ioi.get(objectForField);
+        }
+        out.writeInt(i);
+        return;
+      // Teradata Type: BIGINT
+      case LONG:
+        LongObjectInspector loi = (LongObjectInspector) poi;
+        long l = 0;
+        if (objectForField != null) {
+          l = loi.get(objectForField);
+        }
+        out.writeLong(l);
+        return;
+      // Teradata Type: FLOAT
+      case DOUBLE:
+        DoubleObjectInspector doi = (DoubleObjectInspector) poi;
+        double d = 0;
+        if (objectForField != null) {
+          d = doi.get(objectForField);
+        }
+        out.writeDouble(d);
+        return;
+      // Teradata Type: VARCHAR
+      case VARCHAR:
+        HiveVarcharObjectInspector hvoi = (HiveVarcharObjectInspector) poi;
+        HiveVarcharWritable hv = hvoi.getPrimitiveWritableObject(objectForField);
+        // assert the length of varchar record fits into the table definition
+        if (hv != null) {
+          assert ((VarcharTypeInfo) ti).getLength() >= hv.getHiveVarchar().getCharacterLength();
+        }
+        out.writeVarChar(hv);
+        return;
+      // Teradata Type: TIMESTAMP
+      case TIMESTAMP:
+        TimestampObjectInspector tsoi = (TimestampObjectInspector) poi;
+        TimestampWritableV2 ts = tsoi.getPrimitiveWritableObject(objectForField);
+        out.writeTimestamp(ts, getTimeStampByteNum(timestampPrecision));
+        return;
+      // Teradata Type: DATE
+      case DATE:
+        DateObjectInspector dtoi = (DateObjectInspector) poi;
+        DateWritableV2 dw = dtoi.getPrimitiveWritableObject(objectForField);
+        out.writeDate(dw);
+        return;
+      // Teradata Type: CHAR
+      case CHAR:
+        HiveCharObjectInspector coi = (HiveCharObjectInspector) poi;
+        HiveCharWritable hc = coi.getPrimitiveWritableObject(objectForField);
+        // assert the length of char record fits into the table definition
+        if (hc != null) {
+          assert ((CharTypeInfo) ti).getLength() >= hc.getHiveChar().getCharacterLength();
+        }
+        out.writeChar(hc, getCharByteNum(charCharset) * ((CharTypeInfo) ti).getLength());
+        return;
+      // Teradata Type: DECIMAL
+      case DECIMAL:
+        DecimalTypeInfo dtype = (DecimalTypeInfo) ti;
+        int precision = dtype.precision();
+        int scale = dtype.scale();
+        HiveDecimalObjectInspector hdoi = (HiveDecimalObjectInspector) poi;
+        HiveDecimalWritable hd = hdoi.getPrimitiveWritableObject(objectForField);
+        // assert the precision of decimal record fits into the table definition
+        if (hd != null) {
+          assert (dtype.getPrecision() >= hd.precision());
+        }
+        out.writeDecimal(hd, getDecimalByteNum(precision), scale);
+        return;
+      // Teradata Type: VARBYTE
+      case BINARY:
+        BinaryObjectInspector bnoi = (BinaryObjectInspector) poi;
+        BytesWritable byw = bnoi.getPrimitiveWritableObject(objectForField);
+        out.writeVarByte(byw);
+        return;
+      default:
+        throw new SerDeException("Unrecognized type: " + poi.getPrimitiveCategory());
+      }
+      // Currently, serialization of complex types is not supported
+    case LIST:
+    case MAP:
+    case STRUCT:
+    default:
+      throw new SerDeException("Unrecognized type: " + oi.getCategory());
+    }
+  }
+
+  @Override public SerDeStats getSerDeStats() {
+    // no support for statistics
+    return null;
+  }
+
+  /**
+   * Deserialize an object out of a Writable blob. In most cases, the return
+   * value of this function will be constant since the function will reuse the
+   * returned object. If the client wants to keep a copy of the object, the
+   * client needs to clone the returned value by calling
+   * ObjectInspectorUtils.getStandardObject().
+   *
+   * @param blob
+   *          The Writable object containing a serialized object
+   * @return A Java object representing the contents in the blob.
+   */
+  @Override public Object deserialize(Writable blob) throws SerDeException {
+    try {
+      BytesWritable data = (BytesWritable) blob;
+
+      // initialize the data to be the input stream
+      TeradataBinaryDataInputStream in =
+          new TeradataBinaryDataInputStream(new ByteArrayInputStream(data.getBytes(), 0, data.getLength()));
+
+      int numOfByteRead = in.read(inForNull);
+
+      if (inForNull.length != 0 && numOfByteRead != inForNull.length) {
+        throw new EOFException("not enough bytes for one object");
+      }
+
+      boolean isNull;
+      for (int i = 0; i < numCols; i++) {
+        // get if the ith field is null or not
+        isNull = ((inForNull[i / 8] & (128 >> (i % 8))) != 0);
+        row.set(i, deserializeField(in, columnTypes.get(i), row.get(i), isNull));
+      }
+
+      //After deserializing all the fields, the input should be over in which case in.read will return -1
+      if (in.read() != -1) {
+        throw new EOFException("The inputstream has more after we deserialize all the fields - this is unexpected");
+      }
+    } catch (EOFException e) {
+      LOG.warn("Catch thrown exception", e);
+      LOG.warn("This record has been polluted. We have reset all the row fields to be null");
+      for (int i = 0; i < numCols; i++) {
+        row.set(i, null);
+      }
+    } catch (IOException e) {
+      throw new SerDeException(e);
+    } catch (ParseException e) {
+      throw new SerDeException(e);
+    }
+    return row;
+  }
+
+  private Object deserializeField(TeradataBinaryDataInputStream in, TypeInfo type, Object reuse, boolean isNull)
+      throws IOException, ParseException, SerDeException {
+    // isNull:
+    // In the Teradata Binary file, even the field is null (isNull=true),
+    // thd data still has some default values to pad the record.
+    // In this case, you cannot avoid reading the bytes even it is not used.
+    switch (type.getCategory()) {
+    case PRIMITIVE:
+      PrimitiveTypeInfo ptype = (PrimitiveTypeInfo) type;
+      switch (ptype.getPrimitiveCategory()) {
+      case VARCHAR: // Teradata Type: VARCHAR
+        String st = in.readVarchar();
+        if (isNull) {
+          return null;
+        } else {
+          HiveVarcharWritable r = reuse == null ? new HiveVarcharWritable() : (HiveVarcharWritable) reuse;
+          r.set(st, ((VarcharTypeInfo) type).getLength());
+          return r;
+        }
+      case INT: // Teradata Type: INT
+        int i = in.readInt();
+        if (isNull) {
+          return null;
+        } else {
+          IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
+          r.set(i);
+          return r;
+        }
+      case TIMESTAMP: // Teradata Type: TIMESTAMP
+        Timestamp ts = in.readTimestamp(getTimeStampByteNum(timestampPrecision));
+        if (isNull) {
+          return null;
+        } else {
+          TimestampWritableV2 r = reuse == null ? new TimestampWritableV2() : (TimestampWritableV2) reuse;
+          r.set(ts);
+          return r;
+        }
+      case DOUBLE: // Teradata Type: FLOAT
+        double d = in.readDouble();
+        if (isNull) {
+          return null;
+        } else {
+          DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
+          r.set(d);
+          return r;
+        }
+      case DATE: // Teradata Type: DATE
+        Date dt = in.readDate();
+        if (isNull) {
+          return null;
+        } else {
+          DateWritableV2 r = reuse == null ? new DateWritableV2() : (DateWritableV2) reuse;
+          r.set(dt);
+          return r;
+        }
+      case BYTE: // Teradata Type: BYTEINT
+        byte bt = in.readByte();
+        if (isNull) {
+          return null;
+        } else {
+          ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
+          r.set(bt);
+          return r;
+        }
+      case LONG: // Teradata Type: BIGINT
+        long l = in.readLong();
+        if (isNull) {
+          return null;
+        } else {
+          LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
+          r.set(l);
+          return r;
+        }
+      case CHAR: // Teradata Type: CHAR
+        CharTypeInfo ctype = (CharTypeInfo) type;
+        int length = ctype.getLength();
+        String c = in.readChar(length * getCharByteNum(charCharset));
+        if (isNull) {
+          return null;
+        } else {
+          HiveCharWritable r = reuse == null ? new HiveCharWritable() : (HiveCharWritable) reuse;
+          r.set(c, length);
+          return r;
+        }
+      case DECIMAL: // Teradata Type: DECIMAL
+        DecimalTypeInfo dtype = (DecimalTypeInfo) type;
+        int precision = dtype.precision();
+        int scale = dtype.scale();
+        HiveDecimal hd = in.readDecimal(scale, getDecimalByteNum(precision));
+        if (isNull) {
+          return null;
+        } else {
+          HiveDecimalWritable r = (reuse == null ? new HiveDecimalWritable() : (HiveDecimalWritable) reuse);
+          r.set(hd);
+          return r;
+        }
+      case SHORT: // Teradata Type: SMALLINT
+        short s = in.readShort();
+        if (isNull) {
+          return null;
+        } else {
+          ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
+          r.set(s);
+          return r;
+        }
+      case BINARY: // Teradata Type: VARBYTE
+        byte[] content = in.readVarbyte();
+        if (isNull) {
+          return null;
+        } else {
+          BytesWritable r = new BytesWritable();
+          r.set(content, 0, content.length);
+          return r;
+        }
+      default:
+        throw new SerDeException("Unrecognized type: " + ptype.getPrimitiveCategory());
+      }
+      // Currently, deserialization of complex types is not supported
+    case LIST:
+    case MAP:
+    case STRUCT:
+    default:
+      throw new SerDeException("Unsupported category: " + type.getCategory());
+    }
+  }
+
+  /**
+   * Get the object inspector that can be used to navigate through the internal
+   * structure of the Object returned from deserialize(...).
+   */
+  @Override public ObjectInspector getObjectInspector() throws SerDeException {
+    return rowOI;
+  }
+
+  private int getTimeStampByteNum(int precision) {
+    if (precision == 0) {
+      return DEFAULT_TIMESTAMP_BYTE_NUM;
+    } else {
+      return precision + 1 + DEFAULT_TIMESTAMP_BYTE_NUM;
+    }
+  }
+
+  private int getCharByteNum(String charset) throws SerDeException {
+    if (!CHARSET_TO_BYTE_NUM.containsKey(charCharset)) {
+      throw new SerDeException(
+          format("%s isn't supported in Teradata Char Charset %s", charCharset, CHARSET_TO_BYTE_NUM.keySet()));
+    } else {
+      return CHARSET_TO_BYTE_NUM.get(charset);
+    }
+  }
+
+  private int getDecimalByteNum(int precision) throws SerDeException {
+    if (precision <= 0) {
+      throw new SerDeException(format("the precision of Decimal should be bigger than 0. %d is illegal", precision));
+    }
+    if (precision <= 2) {
+      return 1;
+    }
+    if (precision <= 4) {
+      return 2;
+    }
+    if (precision <= 9) {
+      return 4;
+    }
+    if (precision <= 18) {
+      return 8;
+    }
+    if (precision <= 38) {
+      return 16;
+    }
+    throw new IllegalArgumentException(
+        format("the precision of Decimal should be smaller than 39. %d is illegal", precision));
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/b8d82844/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeForDate.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeForDate.java b/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeForDate.java
new file mode 100644
index 0000000..af81fe3
--- /dev/null
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeForDate.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.teradata;
+
+import com.google.common.io.BaseEncoding;
+import junit.framework.TestCase;
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.io.DateWritableV2;
+import org.apache.hadoop.io.BytesWritable;
+import org.junit.Assert;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+
+/**
+ * Test the data type DATE for Teradata binary format.
+ */
+public class TestTeradataBinarySerdeForDate extends TestCase {
+
+  private final TeradataBinarySerde serde = new TeradataBinarySerde();
+  private final Properties props = new Properties();
+
+  protected void setUp() throws Exception {
+    props.setProperty(serdeConstants.LIST_COLUMNS, "TD_DATE");
+    props.setProperty(serdeConstants.LIST_COLUMN_TYPES, "date");
+    serde.initialize(null, props);
+  }
+
+  public void testTimestampBefore1900() throws Exception {
+
+    //0060-01-01
+    BytesWritable in = new BytesWritable(BaseEncoding.base16().lowerCase().decode("00653de7fe"));
+
+    List<Object> row = (List<Object>) serde.deserialize(in);
+    Date ts = ((DateWritableV2) row.get(0)).get();
+    Assert.assertEquals(ts.getYear(), 60);
+    Assert.assertEquals(ts.getMonth(), 1);
+    Assert.assertEquals(ts.getDay(), 1);
+
+    BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector());
+    Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes()));
+  }
+
+  public void testTimestampAfter1900() throws Exception {
+
+    //9999-01-01
+    BytesWritable in = new BytesWritable(BaseEncoding.base16().lowerCase().decode("0095cfd304"));
+
+    List<Object> row = (List<Object>) serde.deserialize(in);
+    Date ts = ((DateWritableV2) row.get(0)).get();
+    Assert.assertEquals(ts.getYear(), 9999);
+    Assert.assertEquals(ts.getMonth(), 1);
+    Assert.assertEquals(ts.getDay(), 1);
+
+    BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector());
+    Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes()));
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/b8d82844/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeForDecimal.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeForDecimal.java b/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeForDecimal.java
new file mode 100644
index 0000000..6abdd3f
--- /dev/null
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeForDecimal.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.teradata;
+
+import com.google.common.io.BaseEncoding;
+import junit.framework.TestCase;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.junit.Assert;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+
+/**
+ * Test the data type DECIMAL for Teradata binary format.
+ */
+public class TestTeradataBinarySerdeForDecimal extends TestCase {
+
+  private final TeradataBinarySerde serde = new TeradataBinarySerde();
+  private final Properties props = new Properties();
+
+  protected void setUp() throws Exception {
+    props.setProperty(serdeConstants.LIST_COLUMNS, "TD_DECIMAL");
+    props.setProperty(serdeConstants.LIST_COLUMN_TYPES, "decimal(9,5)");
+
+    serde.initialize(null, props);
+  }
+
+  public void testPositiveFraction() throws Exception {
+    BytesWritable in = new BytesWritable(BaseEncoding.base16().lowerCase().decode("0064000000"));
+
+    List<Object> row = (List<Object>) serde.deserialize(in);
+    Assert.assertTrue("0.001".equals(((HiveDecimalWritable) row.get(0)).getHiveDecimal().toString()));
+
+    BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector());
+    Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes()));
+  }
+
+  public void testNegativeFraction() throws Exception {
+    BytesWritable in = new BytesWritable(BaseEncoding.base16().lowerCase().decode("009cffffff"));
+
+    List<Object> row = (List<Object>) serde.deserialize(in);
+    Assert.assertTrue("-0.001".equals(((HiveDecimalWritable) row.get(0)).getHiveDecimal().toString()));
+
+    BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector());
+    Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes()));
+  }
+
+  public void testPositiveNumber1() throws Exception {
+    BytesWritable in = new BytesWritable(BaseEncoding.base16().lowerCase().decode("00a0860100"));
+
+    List<Object> row = (List<Object>) serde.deserialize(in);
+    Assert.assertTrue("1".equals(((HiveDecimalWritable) row.get(0)).getHiveDecimal().toString()));
+
+    BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector());
+    Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes()));
+  }
+
+  public void testNegativeNumber1() throws Exception {
+    BytesWritable in = new BytesWritable(BaseEncoding.base16().lowerCase().decode("006079feff"));
+
+    List<Object> row = (List<Object>) serde.deserialize(in);
+    Assert.assertTrue("-1".equals(((HiveDecimalWritable) row.get(0)).getHiveDecimal().toString()));
+
+    BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector());
+    Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes()));
+  }
+
+  public void testPositiveNumber2() throws Exception {
+    BytesWritable in = new BytesWritable(BaseEncoding.base16().lowerCase().decode("0080969800"));
+
+    List<Object> row = (List<Object>) serde.deserialize(in);
+    Assert.assertTrue("100".equals(((HiveDecimalWritable) row.get(0)).getHiveDecimal().toString()));
+
+    BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector());
+    Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes()));
+  }
+
+  public void testNegativeNumber2() throws Exception {
+    BytesWritable in = new BytesWritable(BaseEncoding.base16().lowerCase().decode("000065c4e0"));
+
+    List<Object> row = (List<Object>) serde.deserialize(in);
+    Assert.assertTrue("-5240".equals(((HiveDecimalWritable) row.get(0)).getHiveDecimal().toString()));
+
+    BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector());
+    Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes()));
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/b8d82844/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeForTimeStamp.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeForTimeStamp.java b/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeForTimeStamp.java
new file mode 100644
index 0000000..a6cf2c1
--- /dev/null
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeForTimeStamp.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.teradata;
+
+import com.google.common.io.BaseEncoding;
+import junit.framework.TestCase;
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
+import org.apache.hadoop.io.BytesWritable;
+import org.junit.Assert;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+
+/**
+ * Test the data type TIMESTAMP for Teradata binary format.
+ */
+public class TestTeradataBinarySerdeForTimeStamp extends TestCase {
+
+  private final TeradataBinarySerde serde = new TeradataBinarySerde();
+  private final Properties props = new Properties();
+
+  protected void setUp() throws Exception {
+    props.setProperty(serdeConstants.LIST_COLUMNS, "TD_TIMESTAMP");
+    props.setProperty(serdeConstants.LIST_COLUMN_TYPES, "timestamp");
+  }
+
+  public void testTimestampPrecision6() throws Exception {
+    props.setProperty(TeradataBinarySerde.TD_TIMESTAMP_PRECISION, "6");
+    serde.initialize(null, props);
+
+    //2012-10-01 12:00:00.110000
+    BytesWritable in = new BytesWritable(
+        BaseEncoding.base16().lowerCase().decode("00323031322d31302d30312031323a30303a30302e313130303030"));
+
+    List<Object> row = (List<Object>) serde.deserialize(in);
+    Timestamp ts = ((TimestampWritableV2) row.get(0)).getTimestamp();
+    Assert.assertEquals(ts.getYear(), 2012);
+    Assert.assertEquals(ts.getMonth(), 10);
+    Assert.assertEquals(ts.getDay(), 1);
+    Assert.assertEquals(ts.getHours(), 12);
+    Assert.assertEquals(ts.getMinutes(), 0);
+    Assert.assertEquals(ts.getSeconds(), 0);
+    Assert.assertEquals(ts.getNanos(), 110000000);
+
+    BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector());
+    Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes()));
+  }
+
+  public void testTimestampPrecision0() throws Exception {
+    props.setProperty(TeradataBinarySerde.TD_TIMESTAMP_PRECISION, "0");
+    serde.initialize(null, props);
+
+    //2012-10-01 12:00:00
+    BytesWritable in =
+        new BytesWritable(BaseEncoding.base16().lowerCase().decode("00323031322d31302d30312031323a30303a3030"));
+
+    List<Object> row = (List<Object>) serde.deserialize(in);
+    Timestamp ts = ((TimestampWritableV2) row.get(0)).getTimestamp();
+    Assert.assertEquals(ts.getYear(), 2012);
+    Assert.assertEquals(ts.getMonth(), 10);
+    Assert.assertEquals(ts.getDay(), 1);
+    Assert.assertEquals(ts.getHours(), 12);
+    Assert.assertEquals(ts.getMinutes(), 0);
+    Assert.assertEquals(ts.getSeconds(), 0);
+    Assert.assertEquals(ts.getNanos(), 0);
+
+    BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector());
+    Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes()));
+  }
+
+  public void testTimestampPrecision3() throws Exception {
+    props.setProperty(TeradataBinarySerde.TD_TIMESTAMP_PRECISION, "3");
+    serde.initialize(null, props);
+
+    //2012-10-01 12:00:00.345
+    BytesWritable in =
+        new BytesWritable(BaseEncoding.base16().lowerCase().decode("00323031322d31302d30312031323a30303a30302e333435"));
+
+    List<Object> row = (List<Object>) serde.deserialize(in);
+    Timestamp ts = ((TimestampWritableV2) row.get(0)).getTimestamp();
+    Assert.assertEquals(ts.getYear(), 2012);
+    Assert.assertEquals(ts.getMonth(), 10);
+    Assert.assertEquals(ts.getDay(), 1);
+    Assert.assertEquals(ts.getHours(), 12);
+    Assert.assertEquals(ts.getMinutes(), 0);
+    Assert.assertEquals(ts.getSeconds(), 0);
+    Assert.assertEquals(ts.getNanos(), 345000000);
+
+    BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector());
+    Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes()));
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/b8d82844/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeGeneral.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeGeneral.java b/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeGeneral.java
new file mode 100644
index 0000000..c50ef70
--- /dev/null
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/teradata/TestTeradataBinarySerdeGeneral.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.teradata;
+
+import com.google.common.io.BaseEncoding;
+import junit.framework.TestCase;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
+import org.apache.hadoop.hive.serde2.io.DateWritableV2;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.junit.Assert;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+
+/**
+ * Test all the data types supported for Teradata Binary Format.
+ */
+public class TestTeradataBinarySerdeGeneral extends TestCase {
+
+  private final TeradataBinarySerde serde = new TeradataBinarySerde();
+  private final Properties props = new Properties();
+
+  protected void setUp() throws Exception {
+    props.setProperty(serdeConstants.LIST_COLUMNS,
+        "TD_CHAR, TD_VARCHAR, TD_BIGINT, TD_INT, TD_SMALLINT, TD_BYTEINT, "
+            + "TD_FLOAT,TD_DECIMAL,TD_DATE, TD_TIMESTAMP, TD_VARBYTE");
+    props.setProperty(serdeConstants.LIST_COLUMN_TYPES,
+        "char(3),varchar(100),bigint,int,smallint,tinyint,double,decimal(31,30),date,timestamp,binary");
+
+    serde.initialize(null, props);
+  }
+
+  public void testDeserializeAndSerialize() throws Exception {
+    BytesWritable in = new BytesWritable(BaseEncoding.base16().lowerCase().decode(
+        "00004e6f762020202020201b006120646179203d2031312f31312f31312020202020202020203435ec10000000000000c5feffff"
+            + "7707010000000000002a40ef2b3dab0d14e6531c8908a72700000007b20100313931312d31312d31312031393a32303a32312e34"
+            + "33333230301b00746573743a20202020202020343333322020202020202020333135"));
+
+    List<Object> row = (List<Object>) serde.deserialize(in);
+    Assert.assertEquals("Nov", ((HiveCharWritable) row.get(0)).toString());
+    Assert.assertEquals("a day = 11/11/11         45", ((HiveVarcharWritable) row.get(1)).toString());
+    Assert.assertEquals(4332L, ((LongWritable) row.get(2)).get());
+    Assert.assertEquals(-315, ((IntWritable) row.get(3)).get());
+    Assert.assertEquals((short) 1911, ((ShortWritable) row.get(4)).get());
+    Assert.assertEquals((byte) 1, ((ByteWritable) row.get(5)).get());
+    Assert.assertEquals((double) 13, ((DoubleWritable) row.get(6)).get(), 0);
+    Assert.assertEquals(30, ((HiveDecimalWritable) row.get(7)).getScale());
+    Assert.assertEquals((double) 3.141592653589793238462643383279,
+        ((HiveDecimalWritable) row.get(7)).getHiveDecimal().doubleValue(), 0);
+    Assert.assertEquals("1911-11-11", ((DateWritableV2) row.get(8)).toString());
+    Assert.assertEquals("1911-11-11 19:20:21.4332", ((TimestampWritableV2) row.get(9)).toString());
+    Assert.assertEquals(27, ((BytesWritable) row.get(10)).getLength());
+
+    BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector());
+    Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes()));
+  }
+
+  public void testDeserializeAndSerializeWithNull() throws Exception {
+    //null bitmap: 0160 -> 00000001 01100000, 7th, 9th, 10th is null
+    BytesWritable in = new BytesWritable(BaseEncoding.base16().lowerCase().decode(
+        "01604d61722020202020201b006120646179203d2031332f30332f303820202020202020202020397ca10000000000004300000"
+            + "0dd0700000000000048834000000000000000000000000000000000443f110020202020202020202020202020202020202020202"
+            + "020202020200000"));
+    List<Object> row = (List<Object>) serde.deserialize(in);
+
+    Assert.assertEquals("Mar", ((HiveCharWritable) row.get(0)).toString());
+    Assert.assertEquals(null, row.get(7));
+    Assert.assertEquals(null, row.get(9));
+    Assert.assertEquals(null, row.get(10));
+
+    BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector());
+    Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes()));
+  }
+
+  public void testDeserializeAndSerializeAllNull() throws Exception {
+    BytesWritable in = new BytesWritable(BaseEncoding.base16().lowerCase().decode(
+        "ffe0202020202020202020000000000000000000000000000000000000000000000000000000000000000000000000000000000"
+            + "00000000020202020202020202020202020202020202020202020202020200000"));
+    List<Object> row = (List<Object>) serde.deserialize(in);
+
+    Assert.assertEquals(null, row.get(0));
+    Assert.assertEquals(null, row.get(1));
+    Assert.assertEquals(null, row.get(3));
+    Assert.assertEquals(null, row.get(4));
+    Assert.assertEquals(null, row.get(5));
+    Assert.assertEquals(null, row.get(6));
+    Assert.assertEquals(null, row.get(7));
+    Assert.assertEquals(null, row.get(8));
+    Assert.assertEquals(null, row.get(9));
+    Assert.assertEquals(null, row.get(10));
+
+    BytesWritable res = (BytesWritable) serde.serialize(row, serde.getObjectInspector());
+    Assert.assertTrue(Arrays.equals(in.copyBytes(), res.copyBytes()));
+  }
+
+  public void testDeserializeCorruptedRecord() throws Exception {
+    BytesWritable in = new BytesWritable(BaseEncoding.base16().lowerCase().decode(
+        "00004e6f762020202020201b006120646179203d2031312f31312f31312020202020202020203435ec10000000000000c5feff"
+            + "ff7707010000000000002a40ef2b3dab0d14e6531c8908a72700000007b20100313931312d31312d31312031393a32303a32312"
+            + "e3433333230301b00746573743a20202020202020343333322020202020202020333135ff"));
+
+    List<Object> row = (List<Object>) serde.deserialize(in);
+    Assert.assertEquals(null, row.get(0));
+    Assert.assertEquals(null, row.get(3));
+    Assert.assertEquals(null, row.get(10));
+  }
+}

[2/2] hive git commit: HIVE-20225: SerDe to support Teradata Binary Format (Lu Li via cws)

Posted by cw...@apache.org.

HIVE-20225: SerDe to support Teradata Binary Format (Lu Li via cws)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b8d82844
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b8d82844
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b8d82844

Branch: refs/heads/master
Commit: b8d82844b9743d7a35dcc7fe6c702486fc4a9d72
Parents: cf5486d
Author: Carl Steinbach <cw...@apache.org>
Authored: Wed Aug 29 13:30:34 2018 -0700
Committer: Carl Steinbach <cw...@apache.org>
Committed: Wed Aug 29 13:50:56 2018 -0700

----------------------------------------------------------------------
 .../ql/io/TeradataBinaryFileInputFormat.java    |  66 ++
 .../ql/io/TeradataBinaryFileOutputFormat.java   | 112 ++++
 .../hive/ql/io/TeradataBinaryRecordReader.java  | 280 +++++++++
 .../clientpositive/test_teradatabinaryfile.q    | 123 ++++
 .../test_teradatabinaryfile.q.out               | 537 +++++++++++++++++
 .../teradata/TeradataBinaryDataInputStream.java | 199 +++++++
 .../TeradataBinaryDataOutputStream.java         | 270 +++++++++
 .../serde2/teradata/TeradataBinarySerde.java    | 597 +++++++++++++++++++
 .../TestTeradataBinarySerdeForDate.java         |  76 +++
 .../TestTeradataBinarySerdeForDecimal.java      | 106 ++++
 .../TestTeradataBinarySerdeForTimeStamp.java    | 111 ++++
 .../TestTeradataBinarySerdeGeneral.java         | 133 +++++
 12 files changed, 2610 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/b8d82844/ql/src/java/org/apache/hadoop/hive/ql/io/TeradataBinaryFileInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/TeradataBinaryFileInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/TeradataBinaryFileInputFormat.java
new file mode 100644
index 0000000..bed87c5
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/TeradataBinaryFileInputFormat.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io;
+
+import java.io.IOException;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * https://cwiki.apache.org/confluence/display/Hive/TeradataBinarySerde.
+ * FileInputFormat for Teradata binary files.
+ *
+ * In the Teradata Binary File, each record constructs as below:
+ * The first 2 bytes represents the length of the bytes next for this record.
+ * Then the null bitmap whose length is depended on the number of fields is followed.
+ * Then each field of the record is serialized into bytes - the serialization strategy is decided by the type of field.
+ * At last, there is one byte (0x0a) in the end of the record.
+ *
+ * This InputFormat currently doesn't support the split of the file.
+ * Teradata binary files are using little endian.
+ */
+public class TeradataBinaryFileInputFormat extends FileInputFormat<NullWritable, BytesWritable> {
+
+  @Override public RecordReader<NullWritable, BytesWritable> getRecordReader(InputSplit split, JobConf job,
+      Reporter reporter) throws IOException {
+    reporter.setStatus(split.toString());
+    return new TeradataBinaryRecordReader(job, (FileSplit) split);
+  }
+
+  /**
+   * the <code>TeradataBinaryFileInputFormat</code> is not splittable right now.
+   * Override the <code>isSplitable</code> function.
+   *
+   * @param fs the file system that the file is on
+   * @param filename the file name to check
+   * @return is this file splitable?
+   */
+  @Override protected boolean isSplitable(FileSystem fs, Path filename) {
+    return false;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/b8d82844/ql/src/java/org/apache/hadoop/hive/ql/io/TeradataBinaryFileOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/TeradataBinaryFileOutputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/TeradataBinaryFileOutputFormat.java
new file mode 100644
index 0000000..0469825
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/TeradataBinaryFileOutputFormat.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Properties;
+
+import org.apache.commons.io.EndianUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.util.Progressable;
+
+import static java.lang.String.format;
+
+/**
+ * https://cwiki.apache.org/confluence/display/Hive/TeradataBinarySerde.
+ * FileOutputFormat for Teradata binary files.
+ *
+ * In the Teradata Binary File, each record constructs as below:
+ * The first 2 bytes represents the length of the bytes next for this record (null bitmap and fields).
+ * Then the null bitmap whose length is depended on the number of fields is followe.
+ * Then each field of the record is serialized into bytes - the serialization strategy is decided by the type of field.
+ * At last, there is one byte (0x0a) in the end of the record.
+ *
+ * Teradata binary files are using little endian.
+ */
+public class TeradataBinaryFileOutputFormat<K extends WritableComparable, V extends Writable>
+    extends HiveIgnoreKeyTextOutputFormat<K, V> {
+  private static final Log LOG = LogFactory.getLog(TeradataBinaryFileOutputFormat.class);
+
+  static final byte RECORD_END_BYTE = (byte) 0x0a;
+
+  /**
+   * create the final out file, and output row by row. After one row is
+   * appended, a configured row separator is appended
+   *
+   * @param jc
+   *          the job configuration file
+   * @param outPath
+   *          the final output file to be created
+   * @param valueClass
+   *          the value class used for create
+   * @param isCompressed
+   *          whether the content is compressed or not
+   * @param tableProperties
+   *          the tableProperties of this file's corresponding table
+   * @param progress
+   *          progress used for status report
+   * @return the RecordWriter
+   */
+  @Override public RecordWriter getHiveRecordWriter(JobConf jc, Path outPath, Class<? extends Writable> valueClass,
+      boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
+    FileSystem fs = outPath.getFileSystem(jc);
+    final OutputStream outStream = Utilities.createCompressedStream(jc, fs.create(outPath, progress), isCompressed);
+    return new RecordWriter() {
+      @Override public void write(Writable r) throws IOException {
+        BytesWritable bw = (BytesWritable) r;
+        int recordLength = bw.getLength();
+
+        //Based on the row length to decide if the length is int or short
+        String rowLength = tableProperties
+            .getProperty(TeradataBinaryRecordReader.TD_ROW_LENGTH, TeradataBinaryRecordReader.DEFAULT_TD_ROW_LENGTH)
+            .toLowerCase();
+        LOG.debug(format("The table property %s is: %s", TeradataBinaryRecordReader.TD_ROW_LENGTH, rowLength));
+
+        if (TeradataBinaryRecordReader.TD_ROW_LENGTH_TO_BYTE_NUM.containsKey(rowLength)) {
+          if (rowLength.equals(TeradataBinaryRecordReader.DEFAULT_TD_ROW_LENGTH)) {
+            EndianUtils.writeSwappedShort(outStream, (short) recordLength); // write the length using little endian
+          } else if (rowLength.equals(TeradataBinaryRecordReader.TD_ROW_LENGTH_1MB)) {
+            EndianUtils.writeSwappedInteger(outStream, recordLength); // write the length using little endian
+          }
+        } else {
+          throw new IllegalArgumentException(format("%s doesn't support the value %s, the supported values are %s",
+              TeradataBinaryRecordReader.TD_ROW_LENGTH, rowLength,
+              TeradataBinaryRecordReader.TD_ROW_LENGTH_TO_BYTE_NUM.keySet()));
+        }
+
+        outStream.write(bw.getBytes(), 0, bw.getLength()); // write the content (the content is in little endian)
+        outStream.write(RECORD_END_BYTE); //write the record ending
+      }
+
+      @Override public void close(boolean abort) throws IOException {
+        outStream.close();
+      }
+    };
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/b8d82844/ql/src/java/org/apache/hadoop/hive/ql/io/TeradataBinaryRecordReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/TeradataBinaryRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/TeradataBinaryRecordReader.java
new file mode 100644
index 0000000..337b5d2
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/TeradataBinaryRecordReader.java
@@ -0,0 +1,280 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Map;
+
+import com.google.common.collect.ImmutableMap;
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.io.EndianUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.Seekable;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionCodecFactory;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+
+import static java.lang.String.format;
+
+/**
+ * The TeradataBinaryRecordReader reads the record from Teradata binary files.
+ *
+ * In the Teradata Binary File, each record constructs as below:
+ * The first 2 bytes represents the length of the bytes next for this record.
+ * Then the null bitmap whose length is depended on the number of fields is followed.
+ * Then each field of the record is serialized into bytes - the serialization strategy is decided by the type of field.
+ * At last, there is one byte (0x0a) in the end of the record.
+ *
+ * This InputFormat currently doesn't support the split of the file.
+ * Teradata binary files are using little endian.
+ */
+public class TeradataBinaryRecordReader implements RecordReader<NullWritable, BytesWritable> {
+
+  private static final Log LOG = LogFactory.getLog(TeradataBinaryRecordReader.class);
+
+  private CompressionCodecFactory compressionCodecs = null;
+  private InputStream in;
+  private long start;
+  private long pos;
+  private long end;
+  private final Seekable filePosition;
+  private CompressionCodec codec;
+
+  static final String TD_ROW_LENGTH = "teradata.row.length";
+  static final Map<String, Integer> TD_ROW_LENGTH_TO_BYTE_NUM = ImmutableMap.of("64kb", 2, "1mb", 4);
+  static final String DEFAULT_TD_ROW_LENGTH = "64kb";
+  static final String TD_ROW_LENGTH_1MB = "1mb";
+
+  private byte[] recordLengthBytes;
+  private byte[] valueByteArray = new byte[65536]; // max byte array
+  private byte[] endOfRecord = new byte[1];
+
+  private int recordLength = 0;
+
+  public TeradataBinaryRecordReader(JobConf job, FileSplit fileSplit) throws IOException {
+    LOG.debug("initialize the TeradataBinaryRecordReader");
+
+    String rowLength = job.get(TD_ROW_LENGTH);
+    if (rowLength == null) {
+      LOG.debug("No table property in JobConf. Try to recover the table directly");
+      Map<String, PartitionDesc> partitionDescMap = Utilities.getMapRedWork(job).getMapWork().getAliasToPartnInfo();
+      for (String alias : Utilities.getMapRedWork(job).getMapWork().getAliasToPartnInfo().keySet()) {
+        LOG.debug(format("the current alias: %s", alias));
+        rowLength = partitionDescMap.get(alias).getTableDesc().getProperties().getProperty(TD_ROW_LENGTH);
+        if (rowLength != null) {
+          break;
+        }
+      }
+    }
+
+    if (rowLength == null) {
+      rowLength = DEFAULT_TD_ROW_LENGTH;
+    } else {
+      rowLength = rowLength.toLowerCase();
+    }
+
+    if (TD_ROW_LENGTH_TO_BYTE_NUM.containsKey(rowLength)) {
+      recordLengthBytes = new byte[TD_ROW_LENGTH_TO_BYTE_NUM.get(rowLength)];
+    } else {
+      throw new IllegalArgumentException(
+          format("%s doesn't support the value %s, the supported values are %s", TD_ROW_LENGTH, rowLength,
+              TD_ROW_LENGTH_TO_BYTE_NUM.keySet()));
+    }
+
+    start = fileSplit.getStart();
+    end = start + fileSplit.getLength();
+
+    LOG.debug(format("The start of the file split is: %s", start));
+    LOG.debug(format("The end of the file split is: %s", end));
+
+    final Path file = fileSplit.getPath();
+    compressionCodecs = new CompressionCodecFactory(job);
+    codec = compressionCodecs.getCodec(file);
+    FileSystem fs = file.getFileSystem(job);
+    FSDataInputStream fileIn = fs.open(fileSplit.getPath());
+
+    /* currently the TeradataBinaryRecord file doesn't support file split at all */
+    filePosition = fileIn;
+    if (isCompressedInput()) {
+      LOG.info(format("Input file is compressed. Using compression code %s", codec.getClass().getName()));
+      in = codec.createInputStream(fileIn);
+    } else {
+      LOG.info("The input file is not compressed");
+      in = fileIn;
+    }
+    pos = start;
+  }
+
+  /**
+   * Reads the next key/value pair from the input for processing.
+   *
+   * @param key the key to read data into
+   * @param value the value to read data into
+   * @return true iff a key/value was read, false if at EOF
+   */
+  @Override public synchronized boolean next(NullWritable key, BytesWritable value) throws IOException {
+
+    /* read the record length */
+    int lengthExpected = recordLengthBytes.length;
+    int hasConsumed = readExpectedBytes(recordLengthBytes, lengthExpected);
+    if (hasConsumed == 0) {
+      LOG.info("Reach the End of File. No more record");
+      return false;
+    } else if (hasConsumed < lengthExpected) {
+      LOG.error(
+          format("We expect %s bytes for the record length but read %d byte and reach the End of File.", lengthExpected,
+              hasConsumed));
+      LOG.error(format("The current position in the file : %s", getFilePosition()));
+      LOG.error(format("The current consumed bytes: %s", pos));
+      LOG.error(format("The bytes for the current record is: %s", Hex.encodeHexString(recordLengthBytes)));
+      throw new EOFException("When reading the record length, reach the unexpected end of file.");
+    }
+    /* get the record contend length to prepare to read the content */
+    recordLength = EndianUtils.readSwappedUnsignedShort(recordLengthBytes, 0);
+    pos += lengthExpected;
+
+    /* read the record content */
+    lengthExpected = recordLength;
+    hasConsumed = readExpectedBytes(valueByteArray, lengthExpected);
+    if (hasConsumed < lengthExpected) {
+      LOG.error(format("We expect %s bytes for the record content but read %d byte and reach the End of File.",
+          lengthExpected, hasConsumed));
+      LOG.error(format("The current position in the file : %s", getFilePosition()));
+      LOG.error(format("The current consumed bytes: %s", pos));
+      LOG.error(format("The bytes for the current record is: %s",
+          Hex.encodeHexString(recordLengthBytes) + Hex.encodeHexString(valueByteArray)));
+      throw new EOFException("When reading the contend of the record, reach the unexpected end of file.");
+    }
+    value.set(valueByteArray, 0, recordLength);
+    pos += lengthExpected;
+
+    /* read the record end */
+    lengthExpected = endOfRecord.length;
+    hasConsumed = readExpectedBytes(endOfRecord, lengthExpected);
+    if (hasConsumed < lengthExpected) {
+      LOG.error(format("We expect %s bytes for the record end symbol but read %d byte and reach the End of File.",
+          lengthExpected, hasConsumed));
+      LOG.error(format("The current position in the file : %s", getFilePosition()));
+      LOG.error(format("The current consumed bytes: %s", pos));
+      LOG.error(format("The bytes for the current record is: %s",
+          Hex.encodeHexString(recordLengthBytes) + Hex.encodeHexString(valueByteArray) + Hex
+              .encodeHexString(endOfRecord)));
+      throw new EOFException("When reading the end of record, reach the unexpected end of file.");
+    }
+
+    if (endOfRecord[0] != TeradataBinaryFileOutputFormat.RECORD_END_BYTE) {
+      throw new IOException(format("We expect 0x0a as the record end but get %s.", Hex.encodeHexString(endOfRecord)));
+    }
+    pos += lengthExpected;
+
+    return true;
+  }
+
+  /**
+   * Create an object of the appropriate type to be used as a key.
+   *
+   * @return a new key object.
+   */
+  @Override public NullWritable createKey() {
+    return NullWritable.get();
+  }
+
+  /**
+   * Create an object of the appropriate type to be used as a value.
+   *
+   * @return a new value object.
+   */
+  @Override public BytesWritable createValue() {
+    return new BytesWritable();
+  }
+
+  /**
+   * Returns the current position in the input.
+   *
+   * @return the current position in the input.
+   * @throws IOException
+   */
+  @Override public long getPos() throws IOException {
+    return pos;
+  }
+
+  /**
+   *
+   * @throws IOException
+   */
+  @Override public void close() throws IOException {
+    if (in != null) {
+      in.close();
+    }
+  }
+
+  /**
+   * How much of the input has the {@link RecordReader} consumed i.e.
+   * has been processed by?
+   *
+   * @return progress from <code>0.0</code> to <code>1.0</code>.
+   * @throws IOException
+   */
+  @Override public float getProgress() throws IOException {
+    if (start == end) {
+      return 0.0F;
+    } else {
+      return Math.min(1.0F, (float) (getFilePosition() - start) / (float) (end - start));
+    }
+  }
+
+  private boolean isCompressedInput() {
+    return codec != null;
+  }
+
+  private synchronized long getFilePosition() throws IOException {
+    long retVal;
+    if (isCompressedInput() && filePosition != null) {
+      retVal = filePosition.getPos();
+    } else {
+      retVal = getPos();
+    }
+    return retVal;
+  }
+
+  private synchronized int readExpectedBytes(byte[] toWrite, int lengthExpected) throws IOException {
+    int curPos = 0;
+    do {
+      int numOfByteRead = in.read(toWrite, curPos, lengthExpected - curPos);
+      if (numOfByteRead < 0) {
+        return curPos;
+      } else {
+        curPos += numOfByteRead;
+      }
+    } while (curPos < lengthExpected);
+    return curPos;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/b8d82844/ql/src/test/queries/clientpositive/test_teradatabinaryfile.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/test_teradatabinaryfile.q b/ql/src/test/queries/clientpositive/test_teradatabinaryfile.q
new file mode 100644
index 0000000..33ab677
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/test_teradatabinaryfile.q
@@ -0,0 +1,123 @@
+DROP TABLE if exists teradata_binary_table_64kb;
+DROP TABLE if exists teradata_binary_table_1mb;
+DROP TABLE if exists teradata_binary_table_64kb_insert;
+DROP TABLE if exists teradata_binary_table_1mb_insert;
+
+
+CREATE TABLE `teradata_binary_table_64kb`(
+  `test_tinyint` tinyint,
+  `test_smallint` smallint,
+  `test_int` int,
+  `test_bigint` bigint,
+  `test_double` double,
+  `test_decimal` decimal(15,2),
+  `test_date` date,
+  `test_timestamp` timestamp,
+  `test_char` char(1),
+  `test_varchar` varchar(40),
+  `test_binary` binary
+ )
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.teradata.TeradataBinarySerde'
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.TeradataBinaryFileInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.TeradataBinaryFileOutputFormat'
+TBLPROPERTIES (
+  'teradata.timestamp.precision'='0',
+  'teradata.char.charset'='LATIN',
+  'teradata.row.length'='64KB'
+);
+
+CREATE TABLE `teradata_binary_table_1mb`(
+  `test_tinyint` tinyint,
+  `test_smallint` smallint,
+  `test_int` int,
+  `test_bigint` bigint,
+  `test_double` double,
+  `test_decimal` decimal(15,2),
+  `test_date` date,
+  `test_timestamp` timestamp,
+  `test_char` char(1),
+  `test_varchar` varchar(40),
+  `test_binary` binary
+ )
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.teradata.TeradataBinarySerde'
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.TeradataBinaryFileInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.TeradataBinaryFileOutputFormat'
+TBLPROPERTIES (
+  'teradata.timestamp.precision'='6',
+  'teradata.char.charset'='UNICODE',
+  'teradata.row.length'='1MB'
+);
+
+CREATE TABLE `teradata_binary_table_64kb_insert`(
+  `test_tinyint` tinyint,
+  `test_decimal` decimal(15,2),
+  `test_date` date,
+  `test_timestamp` timestamp
+ )
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.teradata.TeradataBinarySerde'
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.TeradataBinaryFileInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.TeradataBinaryFileOutputFormat'
+TBLPROPERTIES (
+  'teradata.timestamp.precision'='0',
+  'teradata.char.charset'='LATIN',
+  'teradata.row.length'='64KB'
+);
+
+CREATE TABLE `teradata_binary_table_1mb_insert`(
+  `test_tinyint` tinyint,
+  `test_int` int
+ )
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.teradata.TeradataBinarySerde'
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.TeradataBinaryFileInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.TeradataBinaryFileOutputFormat'
+TBLPROPERTIES (
+  'teradata.timestamp.precision'='6',
+  'teradata.char.charset'='UNICODE',
+  'teradata.row.length'='1MB'
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/teradata_binary_file/teradata_binary_table.deflate' OVERWRITE INTO TABLE teradata_binary_table_64kb;
+LOAD DATA LOCAL INPATH '../../data/files/teradata_binary_file/td_data_with_1mb_rowsize.teradata.gz' OVERWRITE INTO TABLE teradata_binary_table_1mb;
+
+SELECT * from teradata_binary_table_64kb;
+SELECT * from teradata_binary_table_1mb;
+
+SELECT COUNT(*) FROM teradata_binary_table_64kb;
+SELECT COUNT(*) FROM teradata_binary_table_1mb;
+
+SELECT max(date_format(test_timestamp, 'y')) FROM teradata_binary_table_64kb;
+SELECT max(date_format(test_date, 'y')) FROM teradata_binary_table_64kb;
+SELECT max(Floor(test_decimal)) FROM teradata_binary_table_64kb;
+
+SELECT max(date_format(test_timestamp, 'y')) FROM teradata_binary_table_1mb;
+SELECT max(date_format(test_date, 'y')) FROM teradata_binary_table_1mb;
+SELECT max(Floor(test_decimal)) FROM teradata_binary_table_1mb;
+
+SELECT test_tinyint, MAX(test_decimal) FROM teradata_binary_table_64kb GROUP BY test_tinyint;
+SELECT test_tinyint, MAX(test_decimal) FROM teradata_binary_table_1mb GROUP BY test_tinyint;
+
+INSERT OVERWRITE TABLE teradata_binary_table_64kb_insert
+SELECT test_tinyint, test_decimal, test_date, test_timestamp FROM teradata_binary_table_64kb;
+
+INSERT OVERWRITE TABLE teradata_binary_table_1mb_insert
+SELECT 1, 15;
+
+DESC FORMATTED teradata_binary_table_64kb_insert;
+DESC FORMATTED teradata_binary_table_1mb_insert;
+
+DROP TABLE if exists teradata_binary_table_64kb;
+DROP TABLE if exists teradata_binary_table_1mb;
+DROP TABLE if exists teradata_binary_table_64kb_insert;
+DROP TABLE if exists teradata_binary_table_1mb_insert;

http://git-wip-us.apache.org/repos/asf/hive/blob/b8d82844/ql/src/test/results/clientpositive/test_teradatabinaryfile.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/test_teradatabinaryfile.q.out b/ql/src/test/results/clientpositive/test_teradatabinaryfile.q.out
new file mode 100644
index 0000000..9db1372
--- /dev/null
+++ b/ql/src/test/results/clientpositive/test_teradatabinaryfile.q.out
@@ -0,0 +1,537 @@
+PREHOOK: query: DROP TABLE if exists teradata_binary_table_64kb
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE if exists teradata_binary_table_64kb
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE if exists teradata_binary_table_1mb
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE if exists teradata_binary_table_1mb
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE if exists teradata_binary_table_64kb_insert
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE if exists teradata_binary_table_64kb_insert
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE if exists teradata_binary_table_1mb_insert
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE if exists teradata_binary_table_1mb_insert
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE `teradata_binary_table_64kb`(
+  `test_tinyint` tinyint,
+  `test_smallint` smallint,
+  `test_int` int,
+  `test_bigint` bigint,
+  `test_double` double,
+  `test_decimal` decimal(15,2),
+  `test_date` date,
+  `test_timestamp` timestamp,
+  `test_char` char(1),
+  `test_varchar` varchar(40),
+  `test_binary` binary
+ )
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.teradata.TeradataBinarySerde'
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.TeradataBinaryFileInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.TeradataBinaryFileOutputFormat'
+TBLPROPERTIES (
+  'teradata.timestamp.precision'='0',
+  'teradata.char.charset'='LATIN',
+  'teradata.row.length'='64KB'
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@teradata_binary_table_64kb
+POSTHOOK: query: CREATE TABLE `teradata_binary_table_64kb`(
+  `test_tinyint` tinyint,
+  `test_smallint` smallint,
+  `test_int` int,
+  `test_bigint` bigint,
+  `test_double` double,
+  `test_decimal` decimal(15,2),
+  `test_date` date,
+  `test_timestamp` timestamp,
+  `test_char` char(1),
+  `test_varchar` varchar(40),
+  `test_binary` binary
+ )
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.teradata.TeradataBinarySerde'
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.TeradataBinaryFileInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.TeradataBinaryFileOutputFormat'
+TBLPROPERTIES (
+  'teradata.timestamp.precision'='0',
+  'teradata.char.charset'='LATIN',
+  'teradata.row.length'='64KB'
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@teradata_binary_table_64kb
+PREHOOK: query: CREATE TABLE `teradata_binary_table_1mb`(
+  `test_tinyint` tinyint,
+  `test_smallint` smallint,
+  `test_int` int,
+  `test_bigint` bigint,
+  `test_double` double,
+  `test_decimal` decimal(15,2),
+  `test_date` date,
+  `test_timestamp` timestamp,
+  `test_char` char(1),
+  `test_varchar` varchar(40),
+  `test_binary` binary
+ )
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.teradata.TeradataBinarySerde'
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.TeradataBinaryFileInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.TeradataBinaryFileOutputFormat'
+TBLPROPERTIES (
+  'teradata.timestamp.precision'='6',
+  'teradata.char.charset'='UNICODE',
+  'teradata.row.length'='1MB'
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@teradata_binary_table_1mb
+POSTHOOK: query: CREATE TABLE `teradata_binary_table_1mb`(
+  `test_tinyint` tinyint,
+  `test_smallint` smallint,
+  `test_int` int,
+  `test_bigint` bigint,
+  `test_double` double,
+  `test_decimal` decimal(15,2),
+  `test_date` date,
+  `test_timestamp` timestamp,
+  `test_char` char(1),
+  `test_varchar` varchar(40),
+  `test_binary` binary
+ )
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.teradata.TeradataBinarySerde'
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.TeradataBinaryFileInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.TeradataBinaryFileOutputFormat'
+TBLPROPERTIES (
+  'teradata.timestamp.precision'='6',
+  'teradata.char.charset'='UNICODE',
+  'teradata.row.length'='1MB'
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@teradata_binary_table_1mb
+PREHOOK: query: CREATE TABLE `teradata_binary_table_64kb_insert`(
+  `test_tinyint` tinyint,
+  `test_decimal` decimal(15,2),
+  `test_date` date,
+  `test_timestamp` timestamp
+ )
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.teradata.TeradataBinarySerde'
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.TeradataBinaryFileInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.TeradataBinaryFileOutputFormat'
+TBLPROPERTIES (
+  'teradata.timestamp.precision'='0',
+  'teradata.char.charset'='LATIN',
+  'teradata.row.length'='64KB'
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@teradata_binary_table_64kb_insert
+POSTHOOK: query: CREATE TABLE `teradata_binary_table_64kb_insert`(
+  `test_tinyint` tinyint,
+  `test_decimal` decimal(15,2),
+  `test_date` date,
+  `test_timestamp` timestamp
+ )
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.teradata.TeradataBinarySerde'
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.TeradataBinaryFileInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.TeradataBinaryFileOutputFormat'
+TBLPROPERTIES (
+  'teradata.timestamp.precision'='0',
+  'teradata.char.charset'='LATIN',
+  'teradata.row.length'='64KB'
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@teradata_binary_table_64kb_insert
+PREHOOK: query: CREATE TABLE `teradata_binary_table_1mb_insert`(
+  `test_tinyint` tinyint,
+  `test_int` int
+ )
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.teradata.TeradataBinarySerde'
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.TeradataBinaryFileInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.TeradataBinaryFileOutputFormat'
+TBLPROPERTIES (
+  'teradata.timestamp.precision'='6',
+  'teradata.char.charset'='UNICODE',
+  'teradata.row.length'='1MB'
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@teradata_binary_table_1mb_insert
+POSTHOOK: query: CREATE TABLE `teradata_binary_table_1mb_insert`(
+  `test_tinyint` tinyint,
+  `test_int` int
+ )
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.teradata.TeradataBinarySerde'
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.TeradataBinaryFileInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.TeradataBinaryFileOutputFormat'
+TBLPROPERTIES (
+  'teradata.timestamp.precision'='6',
+  'teradata.char.charset'='UNICODE',
+  'teradata.row.length'='1MB'
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@teradata_binary_table_1mb_insert
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/teradata_binary_file/teradata_binary_table.deflate' OVERWRITE INTO TABLE teradata_binary_table_64kb
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@teradata_binary_table_64kb
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/teradata_binary_file/teradata_binary_table.deflate' OVERWRITE INTO TABLE teradata_binary_table_64kb
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@teradata_binary_table_64kb
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/teradata_binary_file/td_data_with_1mb_rowsize.teradata.gz' OVERWRITE INTO TABLE teradata_binary_table_1mb
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@teradata_binary_table_1mb
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/teradata_binary_file/td_data_with_1mb_rowsize.teradata.gz' OVERWRITE INTO TABLE teradata_binary_table_1mb
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@teradata_binary_table_1mb
+PREHOOK: query: SELECT * from teradata_binary_table_64kb
+PREHOOK: type: QUERY
+PREHOOK: Input: default@teradata_binary_table_64kb
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * from teradata_binary_table_64kb
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@teradata_binary_table_64kb
+#### A masked pattern was here ####
+10	34	139997714	32307660	18.6717	59.99	2018-08-23	2018-07-23 01:45:55	A	NULL	NULL
+10	28	89082024	53367308	5.9069	27.90	2018-08-23	2018-07-23 19:45:36	A	NULL	NULL
+10	31	65499801	9495835	5.9064	29.99	2018-08-23	2018-07-23 09:15:10	A	NULL	NULL
+10	20	144923884	123337561	20.1037	50.50	2018-08-23	2018-07-23 22:49:52	A	NULL	NULL
+10	9	118474716	110462827	18.6697	29.99	2018-08-23	2018-07-23 10:13:03	A	NULL	NULL
+10	4	116098596	555556155	20.1017	29.99	2018-07-23	2018-07-23 13:12:10	X	SELF_SERVICE	SELF_SERVICE
+10	10	84492975	100052093	15.4913	29.99	2018-08-23	2018-07-23 17:56:32	A	NULL	NULL
+10	31	101314613	45413087	5.9064	29.99	2018-08-23	2018-07-23 11:26:24	A	NULL	NULL
+10	1	156962113	554297748	NULL	29.99	2018-08-23	2018-07-23 11:31:31	A	NULL	NULL
+10	10	92560875	380929783	20.1011	20.91	2018-07-30	2018-07-23 05:02:42	S	RCHARGE_FAILURE	RCHARGE_FAILURE
+10	5	154490193	186062438	20.1037	29.99	2018-07-23	2018-07-23 10:17:20	X	NULL	NULL
+10	31	2954435	34009387	0.0214	24.23	2018-08-23	2018-07-23 15:46:21	A	NULL	NULL
+10	4	156942563	55362740	0.0024	29.99	2018-08-23	2018-07-23 08:16:49	A	NULL	NULL
+10	31	90527523	126581551	7.5689	59.99	2018-08-23	2018-07-23 03:40:28	A	NULL	NULL
+10	1	118477496	598803186	NULL	29.99	2018-08-23	2018-07-23 10:45:28	A	NULL	NULL
+10	75	137653654	38440942	20.1037	29.99	2018-08-23	2018-07-23 19:01:04	A	NULL	NULL
+10	2	142697304	106829658	20.1008	24.21	2018-07-23	2018-07-23 05:22:17	S	RCHARGE_FAILURE	RCHARGE_FAILURE
+10	14	134043823	264156349	20.1008	24.21	2018-08-23	2018-07-23 12:12:48	A	NULL	NULL
+10	7	91359485	7008957	20.1011	20.91	2018-08-23	2018-07-23 23:42:04	A	NULL	NULL
+10	1	118512426	222159750	NULL	29.99	2018-08-23	2018-07-23 17:06:25	A	NULL	NULL
+10	5	155168873	135968937	18.6697	59.99	2018-07-30	2018-07-23 18:01:35	S	RCHARGE_FAILURE	RCHARGE_FAILURE
+10	4	151084943	38355275	20.1017	29.99	2018-08-23	2018-07-23 04:12:32	A	NULL	NULL
+10	6	118452556	90264779	20.1017	59.99	2018-08-23	2018-07-23 05:18:44	A	NULL	NULL
+10	31	53127101	18622653	0.0115	49.95	2018-08-23	2018-07-23 07:38:05	A	NULL	NULL
+10	1	118479736	216825119	NULL	29.99	2018-08-23	2018-07-23 11:11:51	A	NULL	NULL
+10	4	142708764	21984202	30.5785	27.50	2018-08-23	2018-07-23 10:36:22	A	NULL	NULL
+10	4	142713364	33598449	20.1017	29.99	2018-07-23	2018-07-23 12:49:24	X	SELF_SERVICE	SELF_SERVICE
+10	22	103578546	152144452	20.1017	29.99	2018-08-23	2018-07-23 11:18:44	A	NULL	NULL
+10	22	111233194	69051	20.1017	29.99	2018-08-23	2018-07-23 08:58:16	A	NULL	NULL
+10	12	132376034	2651098	20.1017	29.99	2018-08-23	2018-07-23 06:01:44	A	NULL	NULL
+10	11	135778714	29866847	18.6717	59.99	2018-08-23	2018-07-23 02:35:58	A	NULL	NULL
+10	10	118525066	34556421	5.9064	29.99	2018-08-23	2018-07-23 21:15:29	A	NULL	NULL
+10	7	144897784	532208226	20.1017	29.99	2018-08-23	2018-07-23 14:35:42	A	NULL	NULL
+10	34	87091713	93626084	5.9064	29.99	2018-08-23	2018-07-23 08:56:25	A	NULL	NULL
+10	21	129323704	14298869	30.5516	55.03	2018-08-23	2018-07-23 05:48:14	A	NULL	NULL
+10	31	112813163	36762074	5.9064	29.99	2018-08-23	2018-07-23 18:07:23	A	NULL	NULL
+10	1	156980833	58308375	NULL	59.99	2018-08-23	2018-07-23 14:54:17	A	NULL	NULL
+10	5	150357953	101207194	20.1017	29.99	2018-08-14	2018-07-23 13:53:14	S	NULL	NULL
+10	1	118462836	668498576	NULL	55.03	2018-08-23	2018-07-23 07:44:11	A	NULL	NULL
+10	7	129423664	312394041	20.1017	29.99	2018-08-23	2018-07-23 20:40:42	A	NULL	NULL
+10	10	122518074	5448199	20.1017	29.99	2018-08-23	2018-07-23 01:30:03	A	NULL	NULL
+10	3	113469566	593079639	20.1037	29.99	2018-08-23	2018-07-23 19:39:05	A	NULL	NULL
+10	4	144878314	88960410	18.6689	55.03	2018-08-23	2018-07-23 11:43:56	A	NULL	NULL
+10	8	146831593	168164335	30.5786	28.03	2018-08-23	2018-07-23 11:34:36	A	NULL	NULL
+10	4	91358385	23752815	29.9896	27.21	2018-08-23	2018-07-23 23:20:30	A	NULL	NULL
+10	3	118533306	286487393	30.5529	44.02	2019-07-23	2018-07-23 23:48:14	A	NULL	NULL
+10	7	103618686	339052539	18.6697	59.99	2018-08-23	2018-07-23 18:26:54	A	NULL	NULL
+10	11	92556375	196464425	29.9896	27.21	2018-08-23	2018-07-23 03:15:07	A	NULL	NULL
+10	11	137563254	239883707	18.6697	59.99	2018-08-23	2018-07-23 02:01:31	A	NULL	NULL
+10	2	116078336	61997052	20.1017	29.99	2018-07-23	2018-07-23 00:55:05	X	SELF_SERVICE	SELF_SERVICE
+PREHOOK: query: SELECT * from teradata_binary_table_1mb
+PREHOOK: type: QUERY
+PREHOOK: Input: default@teradata_binary_table_1mb
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * from teradata_binary_table_1mb
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@teradata_binary_table_1mb
+#### A masked pattern was here ####
+-6	0	-99999	-1	NULL	0.00	2011-01-02	2009-02-28 12:34:56	数	AABBCC	
+5	3200	-9999	NULL	3.14159	314000000.00	NULL	2011-02-28 12:34:56	 	ABC	NULL
+-127	32000	-9	1234567890123456789	2.01E10	3.14	2011-01-02	2022-02-28 12:34:56	数	ありがとうございます	�7��c�
+-1	-32000	0	123456789012345678	2.0108E10	314.15	0001-12-31	NULL	A	thank you	�7��c�
+127	32767	1	999000	2.034E12	0.04	2099-01-02	NULL	I		�7��c�
+2	-32767	9	987654321098765432	2.019876E12	NULL	2011-01-02	NULL	あ	test	NULL
+3	32	99	-1234567890123456789	2.0E12	3140000000000.00	2999-12-31	0001-12-28 12:34:56	?	***	�7��c�
+-127	32000	100	1234567890123456789	2.01E10	3.14	2011-01-02	2022-02-28 12:34:56	数	ありがとうございます	�7��c�
+-1	-32000	101	123456789012345678	2.0108E10	314.15	2009-09-09	NULL	A	thank you	�7��c�
+127	32767	102	999000	2.034E12	0.04	2011-01-02	NULL	I		�7��c�
+2	-32767	103	987654321098765432	2.019876E12	NULL	2011-01-02	NULL	あ	test	NULL
+3	32	104	-1234567890123456789	2.01E10	3.14	2011-01-02	0001-12-28 12:34:56	?	*	�7��c�
+-4	320	105	0	2.01E10	3.14	2011-01-02	2010-02-28 12:34:56	NULL	||ありがとうございます||	�7��c�
+5	3200	106	NULL	3.14159	3.14	2011-01-02	2011-02-28 12:34:56	 	ABC	NULL
+-6	0	107	-1	NULL	0.00	2011-01-02	2009-02-28 12:34:56	数	AABBCC	
+7	NULL	108	65536	2.01E-8	NULL	NULL	2099-02-28 12:34:56	数	NULL	�7��c�
+NULL	1	109	256	1.01E18	12.00	2011-01-02	2999-12-31 12:34:56	数	NULL	�7��c�
+-4	320	999	0	2.01E10	3.14	2011-01-02	2010-02-28 12:34:56	NULL	||ありがとうございます||	�7��c�
+NULL	1	1234	256	1.01E18	12.00	2000-01-02	2999-12-31 12:34:56	数	NULL	�7��c�
+7	NULL	999999	65536	2.01E-8	NULL	NULL	2099-02-28 12:34:56	数	NULL	�7��c�
+PREHOOK: query: SELECT COUNT(*) FROM teradata_binary_table_64kb
+PREHOOK: type: QUERY
+PREHOOK: Input: default@teradata_binary_table_64kb
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(*) FROM teradata_binary_table_64kb
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@teradata_binary_table_64kb
+#### A masked pattern was here ####
+50
+PREHOOK: query: SELECT COUNT(*) FROM teradata_binary_table_1mb
+PREHOOK: type: QUERY
+PREHOOK: Input: default@teradata_binary_table_1mb
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(*) FROM teradata_binary_table_1mb
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@teradata_binary_table_1mb
+#### A masked pattern was here ####
+20
+PREHOOK: query: SELECT max(date_format(test_timestamp, 'y')) FROM teradata_binary_table_64kb
+PREHOOK: type: QUERY
+PREHOOK: Input: default@teradata_binary_table_64kb
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT max(date_format(test_timestamp, 'y')) FROM teradata_binary_table_64kb
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@teradata_binary_table_64kb
+#### A masked pattern was here ####
+2018
+PREHOOK: query: SELECT max(date_format(test_date, 'y')) FROM teradata_binary_table_64kb
+PREHOOK: type: QUERY
+PREHOOK: Input: default@teradata_binary_table_64kb
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT max(date_format(test_date, 'y')) FROM teradata_binary_table_64kb
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@teradata_binary_table_64kb
+#### A masked pattern was here ####
+2019
+PREHOOK: query: SELECT max(Floor(test_decimal)) FROM teradata_binary_table_64kb
+PREHOOK: type: QUERY
+PREHOOK: Input: default@teradata_binary_table_64kb
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT max(Floor(test_decimal)) FROM teradata_binary_table_64kb
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@teradata_binary_table_64kb
+#### A masked pattern was here ####
+59
+PREHOOK: query: SELECT max(date_format(test_timestamp, 'y')) FROM teradata_binary_table_1mb
+PREHOOK: type: QUERY
+PREHOOK: Input: default@teradata_binary_table_1mb
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT max(date_format(test_timestamp, 'y')) FROM teradata_binary_table_1mb
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@teradata_binary_table_1mb
+#### A masked pattern was here ####
+2999
+PREHOOK: query: SELECT max(date_format(test_date, 'y')) FROM teradata_binary_table_1mb
+PREHOOK: type: QUERY
+PREHOOK: Input: default@teradata_binary_table_1mb
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT max(date_format(test_date, 'y')) FROM teradata_binary_table_1mb
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@teradata_binary_table_1mb
+#### A masked pattern was here ####
+2999
+PREHOOK: query: SELECT max(Floor(test_decimal)) FROM teradata_binary_table_1mb
+PREHOOK: type: QUERY
+PREHOOK: Input: default@teradata_binary_table_1mb
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT max(Floor(test_decimal)) FROM teradata_binary_table_1mb
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@teradata_binary_table_1mb
+#### A masked pattern was here ####
+3140000000000
+PREHOOK: query: SELECT test_tinyint, MAX(test_decimal) FROM teradata_binary_table_64kb GROUP BY test_tinyint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@teradata_binary_table_64kb
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT test_tinyint, MAX(test_decimal) FROM teradata_binary_table_64kb GROUP BY test_tinyint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@teradata_binary_table_64kb
+#### A masked pattern was here ####
+10	59.99
+PREHOOK: query: SELECT test_tinyint, MAX(test_decimal) FROM teradata_binary_table_1mb GROUP BY test_tinyint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@teradata_binary_table_1mb
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT test_tinyint, MAX(test_decimal) FROM teradata_binary_table_1mb GROUP BY test_tinyint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@teradata_binary_table_1mb
+#### A masked pattern was here ####
+NULL	12.00
+-127	3.14
+-6	0.00
+-4	3.14
+-1	314.15
+2	NULL
+3	3140000000000.00
+5	314000000.00
+7	NULL
+127	0.04
+PREHOOK: query: INSERT OVERWRITE TABLE teradata_binary_table_64kb_insert
+SELECT test_tinyint, test_decimal, test_date, test_timestamp FROM teradata_binary_table_64kb
+PREHOOK: type: QUERY
+PREHOOK: Input: default@teradata_binary_table_64kb
+PREHOOK: Output: default@teradata_binary_table_64kb_insert
+POSTHOOK: query: INSERT OVERWRITE TABLE teradata_binary_table_64kb_insert
+SELECT test_tinyint, test_decimal, test_date, test_timestamp FROM teradata_binary_table_64kb
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@teradata_binary_table_64kb
+POSTHOOK: Output: default@teradata_binary_table_64kb_insert
+POSTHOOK: Lineage: teradata_binary_table_64kb_insert.test_date SIMPLE [(teradata_binary_table_64kb)teradata_binary_table_64kb.FieldSchema(name:test_date, type:date, comment:from deserializer), ]
+POSTHOOK: Lineage: teradata_binary_table_64kb_insert.test_decimal SIMPLE [(teradata_binary_table_64kb)teradata_binary_table_64kb.FieldSchema(name:test_decimal, type:decimal(15,2), comment:from deserializer), ]
+POSTHOOK: Lineage: teradata_binary_table_64kb_insert.test_timestamp SIMPLE [(teradata_binary_table_64kb)teradata_binary_table_64kb.FieldSchema(name:test_timestamp, type:timestamp, comment:from deserializer), ]
+POSTHOOK: Lineage: teradata_binary_table_64kb_insert.test_tinyint SIMPLE [(teradata_binary_table_64kb)teradata_binary_table_64kb.FieldSchema(name:test_tinyint, type:tinyint, comment:from deserializer), ]
+PREHOOK: query: INSERT OVERWRITE TABLE teradata_binary_table_1mb_insert
+SELECT 1, 15
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@teradata_binary_table_1mb_insert
+POSTHOOK: query: INSERT OVERWRITE TABLE teradata_binary_table_1mb_insert
+SELECT 1, 15
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@teradata_binary_table_1mb_insert
+POSTHOOK: Lineage: teradata_binary_table_1mb_insert.test_int SIMPLE []
+POSTHOOK: Lineage: teradata_binary_table_1mb_insert.test_tinyint EXPRESSION []
+PREHOOK: query: DESC FORMATTED teradata_binary_table_64kb_insert
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@teradata_binary_table_64kb_insert
+POSTHOOK: query: DESC FORMATTED teradata_binary_table_64kb_insert
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@teradata_binary_table_64kb_insert
+# col_name            	data_type           	comment             
+test_tinyint        	tinyint             	from deserializer   
+test_decimal        	decimal(15,2)       	from deserializer   
+test_date           	date                	from deserializer   
+test_timestamp      	timestamp           	from deserializer   
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
+	bucketing_version   	2                   
+	numFiles            	1                   
+	numRows             	50                  
+	rawDataSize         	0                   
+	teradata.char.charset	LATIN               
+	teradata.row.length 	64KB                
+	teradata.timestamp.precision	0                   
+	totalSize           	1800                
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.teradata.TeradataBinarySerde	 
+InputFormat:        	org.apache.hadoop.hive.ql.io.TeradataBinaryFileInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.TeradataBinaryFileOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: DESC FORMATTED teradata_binary_table_1mb_insert
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@teradata_binary_table_1mb_insert
+POSTHOOK: query: DESC FORMATTED teradata_binary_table_1mb_insert
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@teradata_binary_table_1mb_insert
+# col_name            	data_type           	comment             
+test_tinyint        	tinyint             	from deserializer   
+test_int            	int                 	from deserializer   
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"test_int\":\"true\",\"test_tinyint\":\"true\"}}
+	bucketing_version   	2                   
+	numFiles            	1                   
+	numRows             	1                   
+	rawDataSize         	0                   
+	teradata.char.charset	UNICODE             
+	teradata.row.length 	1MB                 
+	teradata.timestamp.precision	6                   
+	totalSize           	11                  
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.teradata.TeradataBinarySerde	 
+InputFormat:        	org.apache.hadoop.hive.ql.io.TeradataBinaryFileInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.TeradataBinaryFileOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: DROP TABLE if exists teradata_binary_table_64kb
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@teradata_binary_table_64kb
+PREHOOK: Output: default@teradata_binary_table_64kb
+POSTHOOK: query: DROP TABLE if exists teradata_binary_table_64kb
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@teradata_binary_table_64kb
+POSTHOOK: Output: default@teradata_binary_table_64kb
+PREHOOK: query: DROP TABLE if exists teradata_binary_table_1mb
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@teradata_binary_table_1mb
+PREHOOK: Output: default@teradata_binary_table_1mb
+POSTHOOK: query: DROP TABLE if exists teradata_binary_table_1mb
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@teradata_binary_table_1mb
+POSTHOOK: Output: default@teradata_binary_table_1mb
+PREHOOK: query: DROP TABLE if exists teradata_binary_table_64kb_insert
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@teradata_binary_table_64kb_insert
+PREHOOK: Output: default@teradata_binary_table_64kb_insert
+POSTHOOK: query: DROP TABLE if exists teradata_binary_table_64kb_insert
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@teradata_binary_table_64kb_insert
+POSTHOOK: Output: default@teradata_binary_table_64kb_insert
+PREHOOK: query: DROP TABLE if exists teradata_binary_table_1mb_insert
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@teradata_binary_table_1mb_insert
+PREHOOK: Output: default@teradata_binary_table_1mb_insert
+POSTHOOK: query: DROP TABLE if exists teradata_binary_table_1mb_insert
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@teradata_binary_table_1mb_insert
+POSTHOOK: Output: default@teradata_binary_table_1mb_insert

http://git-wip-us.apache.org/repos/asf/hive/blob/b8d82844/serde/src/java/org/apache/hadoop/hive/serde2/teradata/TeradataBinaryDataInputStream.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/teradata/TeradataBinaryDataInputStream.java b/serde/src/java/org/apache/hadoop/hive/serde2/teradata/TeradataBinaryDataInputStream.java
new file mode 100644
index 0000000..b26d342
--- /dev/null
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/teradata/TeradataBinaryDataInputStream.java
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.teradata;
+
+import org.apache.commons.io.input.SwappedDataInputStream;
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.Timestamp;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.math.BigInteger;
+import java.text.ParseException;
+
+import static java.lang.String.format;
+
+/**
+ * The TeradataBinaryDataInputStream is used to handle the Teradata binary format input for record.
+ * Since the TD binary format uses little-endian to handle the SHORT, INT, LONG, DOUBLE and etc.
+ * while the Hadoop uses big-endian,
+ * We extend SwappedDataInputStream to handle these types and extend to handle the Teradata
+ * specific types like VARCHAR, CHAR, TIMESTAMP, DATE...
+ */
+public class TeradataBinaryDataInputStream extends SwappedDataInputStream {
+
+  private static final int DATE_STRING_LENGTH = 8;
+
+  /**
+   * Instantiates a new Teradata binary data input stream.
+   *
+   * @param input the input
+   */
+  public TeradataBinaryDataInputStream(InputStream input) {
+    super(input);
+  }
+
+  /**
+   * Read VARCHAR(N).
+   * The representation of Varchar in Teradata binary format is:
+   * the first two bytes represent the length N of this varchar field,
+   * the next N bytes represent the content of this varchar field.
+   * To pad the null varchar, the length will be 0 and the content will be none.
+   *
+   * @return the string
+   * @throws IOException the io exception
+   */
+  public String readVarchar() throws IOException {
+    int varcharLength = readUnsignedShort();
+    byte[] varcharContent = new byte[varcharLength];
+    int numOfBytesRead = in.read(varcharContent);
+    if (varcharContent.length != 0 && numOfBytesRead != varcharLength) {
+      throw new EOFException(
+          format("Fail to read the varchar. Expect %d bytes, get %d bytes", varcharLength, numOfBytesRead));
+    }
+    //force it to be UTF8 string
+    return new String(varcharContent, "UTF8");
+  }
+
+  /**
+   * Read TIMESTAMP(P).
+   * The representation of timestamp in Teradata binary format is:
+   * the byte number to read is based on the precision of timestamp,
+   * each byte represents one char and the timestamp is using string representation,
+   * eg: for TIMESTAMP(6), we need to read 26 bytes
+   * 31 39  31 31 2d 31 31 2d 31 31 20 31 39 3a 32 30 3a 32 31 2e 34 33 33 32 30 30
+   * will represent 1911-11-11 19:20:21.433200.
+   * the null timestamp will use space to pad.
+   *
+   * @param byteNum the byte number that will be read from inputstream
+   * @return the timestamp
+   * @throws IOException the io exception
+   */
+  public Timestamp readTimestamp(Integer byteNum) throws IOException {
+    // yyyy-mm-dd hh:mm:ss
+    byte[] timestampContent = new byte[byteNum];
+    int numOfBytesRead = in.read(timestampContent);
+    if (timestampContent.length != 0 && numOfBytesRead != byteNum) {
+      throw new EOFException(
+          format("Fail to read the timestamp. Expect %d bytes, get %d bytes", byteNum, numOfBytesRead));
+    }
+    String timestampStr = new String(timestampContent, "UTF8");
+    if (timestampStr.trim().length() == 0) {
+      return null;
+    }
+    return Timestamp.valueOf(timestampStr);
+  }
+
+  /**
+   * Read DATE.
+   * The representation of date in Teradata binary format is:
+   * The Date D is a int with 4 bytes using little endian,
+   * The representation is (D+19000000).ToString -> YYYYMMDD,
+   * eg: Date 07 b2 01 00 -> 111111 in little endian -> 19111111 - > 1911.11.11.
+   * the null date will use 0 to pad.
+   *
+   * @return the date
+   * @throws IOException the io exception
+   * @throws ParseException the parse exception
+   */
+  public Date readDate() throws IOException, ParseException {
+    int di = readInt();
+    if (di == 0) {
+      return null;
+    }
+    String dateString = String.valueOf(di + 19000000);
+    if (dateString.length() < DATE_STRING_LENGTH) {
+      dateString = StringUtils.leftPad(dateString, DATE_STRING_LENGTH, '0');
+    }
+    Date date = new Date();
+    date.setYear(Integer.parseInt(dateString.substring(0, 4)));
+    date.setMonth(Integer.parseInt(dateString.substring(4, 6)));
+    date.setDayOfMonth(Integer.parseInt(dateString.substring(6, 8)));
+    return date;
+  }
+
+  /**
+   * Read CHAR(N).
+   * The representation of char in Teradata binary format is
+   * the byte number to read is based on the [charLength] * [bytePerChar] <- totalLength,
+   * bytePerChar is decided by the charset: LATAIN charset is 2 bytes per char and UNICODE charset is 3 bytes per char.
+   * the null char will use space to pad.
+   *
+   * @param totalLength the total length
+   * @return the string
+   * @throws IOException the io exception
+   */
+  public String readChar(int totalLength) throws IOException {
+    byte[] charContent = new byte[totalLength];
+    int numOfBytesRead = in.read(charContent);
+    if (charContent.length != 0 && numOfBytesRead != totalLength) {
+      throw new EOFException(
+          format("Fail to read the varchar. Expect %d bytes, get %d bytes", totalLength, numOfBytesRead));
+    }
+    return new String(charContent, "UTF8");
+  }
+
+  /**
+   * Read DECIMAL(P, S).
+   * The representation of decimal in Teradata binary format is
+   * the byte number to read is decided solely by the precision(P),
+   * HiveDecimal is constructed through the byte array and scale.
+   * the null DECIMAL will use 0x00 to pad.
+   *
+   * @param scale the scale
+   * @param byteNum the byte num
+   * @return the hive decimal
+   * @throws IOException the io exception
+   */
+  public HiveDecimal readDecimal(int scale, int byteNum) throws IOException {
+    byte[] decimalContent = new byte[byteNum];
+    int numOfBytesRead = in.read(decimalContent);
+    if (decimalContent.length != 0 && numOfBytesRead != byteNum) {
+      throw new EOFException(
+          format("Fail to read the decimal. Expect %d bytes, get %d bytes", byteNum, numOfBytesRead));
+    }
+    ArrayUtils.reverse(decimalContent);
+    return HiveDecimal.create(new BigInteger(decimalContent), scale);
+  }
+
+  /**
+   * Read VARBYTE(N).
+   * The representation of VARBYTE in Teradata binary format is:
+   * the first two bytes represent the length N of this varchar field
+   * the next N bytes represent the content of this varchar field.
+   * To pad the null varbyte, the length will be 0 and the content will be none.
+   *
+   * @return the byte [ ]
+   * @throws IOException the io exception
+   */
+  public byte[] readVarbyte() throws IOException {
+    int varbyteLength = readUnsignedShort();
+    byte[] varbyteContent = new byte[varbyteLength];
+    int numOfBytesRead = in.read(varbyteContent);
+    if (varbyteContent.length != 0 && numOfBytesRead != varbyteLength) {
+      throw new EOFException(
+          format("Fail to read the varbyte. Expect %d bytes, get %d bytes", varbyteLength, numOfBytesRead));
+    }
+    return varbyteContent;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/b8d82844/serde/src/java/org/apache/hadoop/hive/serde2/teradata/TeradataBinaryDataOutputStream.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/teradata/TeradataBinaryDataOutputStream.java b/serde/src/java/org/apache/hadoop/hive/serde2/teradata/TeradataBinaryDataOutputStream.java
new file mode 100644
index 0000000..f2f801d
--- /dev/null
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/teradata/TeradataBinaryDataOutputStream.java
@@ -0,0 +1,270 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.teradata;
+
+import org.apache.commons.io.EndianUtils;
+import org.apache.commons.io.output.ByteArrayOutputStream;
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde2.io.DateWritableV2;
+import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+
+import java.io.IOException;
+import java.math.BigInteger;
+import java.util.Arrays;
+import java.util.Collections;
+
+import static java.lang.String.join;
+import static java.lang.String.format;
+
+
+/**
+ * The TeradataBinaryDataOutputStream is used to produce the output in compliance with the Teradata binary format,
+ * so the output can be directly used to load into Teradata DB using TPT fastload.
+ * Since the TD binary format uses little-endian to handle the SHORT, INT, LONG, DOUBLE and etc.
+ * while the Hadoop uses big-endian,
+ * We extend SwappedDataInputStream to return qualified bytes for these types and extend to handle the Teradata
+ * specific types like VARCHAR, CHAR, TIMESTAMP, DATE...
+ */
+public class TeradataBinaryDataOutputStream extends ByteArrayOutputStream {
+
+  private static final Log LOG = LogFactory.getLog(TeradataBinaryDataOutputStream.class);
+
+  private static final int TIMESTAMP_NO_NANOS_BYTE_NUM = 19;
+
+  public TeradataBinaryDataOutputStream() {
+  }
+
+  /**
+   * Write VARCHAR(N).
+   * The representation of Varchar in Teradata binary format is:
+   * the first two bytes represent the length N of this varchar field,
+   * the next N bytes represent the content of this varchar field.
+   * To pad the null varchar, the length will be 0 and the content will be none.
+   *
+   * @param writable the writable
+   * @throws IOException the io exception
+   */
+  public void writeVarChar(HiveVarcharWritable writable) throws IOException {
+    if (writable == null) {
+      EndianUtils.writeSwappedShort(this, (short) 0);
+      return;
+    }
+    Text t = writable.getTextValue();
+    int varcharLength = t.getLength();
+    EndianUtils.writeSwappedShort(this, (short) varcharLength); // write the varchar length
+    write(t.getBytes(), 0, varcharLength); // write the varchar content
+  }
+
+  /**
+   * Write INT.
+   * using little-endian to write integer.
+   *
+   * @param i the
+   * @throws IOException the io exception
+   */
+  public void writeInt(int i) throws IOException {
+    EndianUtils.writeSwappedInteger(this, i);
+  }
+
+  /**
+   * Write TIMESTAMP(N).
+   * The representation of timestamp in Teradata binary format is:
+   * the byte number to read is based on the precision of timestamp,
+   * each byte represents one char and the timestamp is using string representation,
+   * eg: for 1911-11-11 19:20:21.433200 in TIMESTAMP(3), we will cut it to be 1911-11-11 19:20:21.433 and write
+   * 31 39  31 31 2d 31 31 2d 31 31 20 31 39 3a 32 30 3a 32 31 2e 34 33 33.
+   * the null timestamp will use space to pad.
+   *
+   * @param timestamp the timestamp
+   * @param byteNum the byte number the timestamp will write
+   * @throws IOException the io exception
+   */
+  public void writeTimestamp(TimestampWritableV2 timestamp, int byteNum) throws IOException {
+    if (timestamp == null) {
+      String pad = join("", Collections.nCopies(byteNum, " "));
+      write(pad.getBytes("UTF8"));
+      return;
+    }
+    String sTimeStamp = timestamp.getTimestamp().toString();
+    if (sTimeStamp.length() >= byteNum) {
+      write(sTimeStamp.substring(0, byteNum).getBytes("UTF8"));
+      return;
+    }
+    write(sTimeStamp.getBytes("UTF8"));
+    String pad;
+    if (sTimeStamp.length() == TIMESTAMP_NO_NANOS_BYTE_NUM) {
+      pad = "." + join("", Collections.nCopies(byteNum - sTimeStamp.length() - 1, "0"));
+    } else {
+      pad = join("", Collections.nCopies(byteNum - sTimeStamp.length(), "0"));
+    }
+    write(pad.getBytes("UTF8"));
+  }
+
+  /**
+   * Write DOUBLE.
+   * using little-endian to write double.
+   *
+   * @param d the d
+   * @throws IOException the io exception
+   */
+  public void writeDouble(double d) throws IOException {
+    EndianUtils.writeSwappedDouble(this, d);
+  }
+
+  /**
+   * Write DATE.
+   * The representation of date in Teradata binary format is:
+   * The Date D is a int with 4 bytes using little endian.
+   * The representation is (YYYYMMDD - 19000000).toInt -> D
+   * eg. 1911.11.11 -> 19111111 -> 111111 -> 07 b2 01 00 in little endian.
+   * the null date will use 0 to pad.
+   *
+   * @param date the date
+   * @throws IOException the io exception
+   */
+  public void writeDate(DateWritableV2 date) throws IOException {
+    if (date == null) {
+      EndianUtils.writeSwappedInteger(this, 0);
+      return;
+    }
+    int toWrite = date.get().getYear() * 10000 + date.get().getMonth() * 100 + date.get().getDay() - 19000000;
+    EndianUtils.writeSwappedInteger(this, toWrite);
+  }
+
+  /**
+   * Write LONG.
+   * using little-endian to write double.
+   *
+   * @param l the l
+   * @throws IOException the io exception
+   */
+  public void writeLong(long l) throws IOException {
+    EndianUtils.writeSwappedLong(this, l);
+  }
+
+  /**
+   * Write CHAR(N).
+   * The representation of char in Teradata binary format is:
+   * the byte number to read is based on the [charLength] * [bytePerChar] <- totalLength,
+   * bytePerChar is decided by the charset: LATAIN charset is 2 bytes per char and UNICODE charset is 3 bytes per char.
+   * the null char will use space to pad.
+   *
+   * @param writable the writable
+   * @param length the byte n
+   * @throws IOException the io exception
+   */
+  public void writeChar(HiveCharWritable writable, int length) throws IOException {
+    if (writable == null) {
+      String pad = join("", Collections.nCopies(length, " "));
+      write(pad.getBytes("UTF8"));
+      return;
+    }
+    Text t = writable.getStrippedValue();
+    int contentLength = t.getLength();
+    write(t.getBytes(), 0, contentLength);
+    if (length - contentLength < 0) {
+      throw new IOException(format("The byte num %s of HiveCharWritable is more than the byte num %s we can hold. "
+          + "The content of HiveCharWritable is %s", contentLength, length, writable.getPaddedValue()));
+    }
+    if (length > contentLength) {
+      String pad = join("", Collections.nCopies(length - contentLength, " "));
+      write(pad.getBytes("UTF8"));
+    }
+  }
+
+  /**
+   * Write DECIMAL(P, S).
+   * The representation of decimal in Teradata binary format is:
+   * the byte number to read is decided solely by the precision(P),
+   * HiveDecimal is constructed through the byte array and scale.
+   * the rest of byte will use 0x00 to pad (positive) and use 0xFF to pad (negative).
+   * the null DECIMAL will use 0x00 to pad.
+   *
+   * @param writable the writable
+   * @param byteNum the byte num
+   * @throws IOException the io exception
+   */
+  public void writeDecimal(HiveDecimalWritable writable, int byteNum, int scale) throws IOException {
+    if (writable == null) {
+      byte[] pad = new byte[byteNum];
+      write(pad);
+      return;
+    }
+    // since the HiveDecimal will auto adjust the scale to save resource
+    // we need to adjust it back otherwise the output bytes will be wrong
+    int hiveScale = writable.getHiveDecimal().scale();
+    BigInteger bigInteger = writable.getHiveDecimal().unscaledValue();
+    if (hiveScale < scale) {
+      BigInteger multiplicand = new BigInteger("1" + join("", Collections.nCopies(scale - hiveScale, "0")));
+      bigInteger = bigInteger.multiply(multiplicand);
+    }
+    byte[] content = bigInteger.toByteArray();
+    int signBit = content[0] >> 7 & 1;
+    ArrayUtils.reverse(content);
+    write(content);
+    if (byteNum > content.length) {
+      byte[] pad;
+      if (signBit == 0) {
+        pad = new byte[byteNum - content.length];
+      } else {
+        pad = new byte[byteNum - content.length];
+        Arrays.fill(pad, (byte) 255);
+      }
+      write(pad);
+    }
+  }
+
+  /**
+   * Write SHORT.
+   * using little-endian to write short.
+   *
+   * @param s the s
+   * @throws IOException the io exception
+   */
+  public void writeShort(short s) throws IOException {
+    EndianUtils.writeSwappedShort(this, s);
+  }
+
+  /**
+   * Write VARBYTE(N).
+   * The representation of VARBYTE in Teradata binary format is:
+   * the first two bytes represent the length N of this varchar field,
+   * the next N bytes represent the content of this varchar field.
+   * To pad the null varbyte, the length will be 0 and the content will be none.
+   *
+   * @param writable the writable
+   * @throws IOException the io exception
+   */
+  public void writeVarByte(BytesWritable writable) throws IOException {
+    if (writable == null) {
+      EndianUtils.writeSwappedShort(this, (short) 0);
+      return;
+    }
+    int varbyteLength = writable.getLength();
+    EndianUtils.writeSwappedShort(this, (short) varbyteLength); // write the varbyte length
+    write(writable.getBytes(), 0, varbyteLength); // write the varchar content
+  }
+}