You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by dh...@apache.org on 2008/11/11 02:50:18 UTC

svn commit: r712905 [38/38] - in /hadoop/core/trunk: ./ src/contrib/hive/ src/contrib/hive/cli/src/java/org/apache/hadoop/hive/cli/ src/contrib/hive/common/src/java/org/apache/hadoop/hive/conf/ src/contrib/hive/conf/ src/contrib/hive/data/files/ src/co...

Modified: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java?rev=712905&r1=712904&r2=712905&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java (original)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java Mon Nov 10 17:50:06 2008
@@ -28,6 +28,7 @@
 import com.facebook.thrift.protocol.*;
 import java.util.*;
 import java.util.regex.Pattern;
+import java.util.regex.Matcher;
 import java.io.*;
 import org.apache.hadoop.conf.Configuration;
 import java.util.Properties;
@@ -39,7 +40,8 @@
  * This is not thrift compliant in that it doesn't write out field ids
  * so things cannot actually be versioned.
  */
-public class TCTLSeparatedProtocol extends TProtocol implements ConfigurableTProtocol {
+public class TCTLSeparatedProtocol extends TProtocol 
+  implements ConfigurableTProtocol, WriteNullsProtocol, SkippableTProtocol {
 
   final static Log LOG = LogFactory.getLog(TCTLSeparatedProtocol.class.getName());
   
@@ -57,29 +59,36 @@
   /**
    * These are defaults, but for now leaving them like this
    */
-  final static protected byte defaultPrimarySeparatorByte = 1;
-  final static protected byte defaultSecondarySeparatorByte = 2;
-  final static protected byte defaultRowSeparatorByte = (byte)'\n';
-  final static protected byte defaultMapSeparatorByte = 3;
+  final static protected String defaultPrimarySeparator = "\001";
+  final static protected String defaultSecondarySeparator = "\002";
+  final static protected String defaultRowSeparator = "\n";
+  final static protected String defaultMapSeparator = "\003";
 
   /**
    * The separators for this instance
    */
-  protected byte primarySeparatorByte;
-  protected byte secondarySeparatorByte;
-  protected byte rowSeparatorByte;
-  protected byte mapSeparatorByte;
+  protected String primarySeparator;
+  protected String secondarySeparator;
+  protected String rowSeparator;
+  protected String mapSeparator;
   protected Pattern primaryPattern;
   protected Pattern secondaryPattern;
   protected Pattern mapPattern;
 
   /**
+   * The quote character when supporting quotes with ability to not split across quoted entries. Like csv.
+   * Note that escaping the quote is not currently supported.
+   */
+  protected String quote;
+  
+
+  /**
    * Inspect the separators this instance is configured with.
    */
-  public byte getPrimarySeparator() { return primarySeparatorByte; }
-  public byte getSecondarySeparator() { return secondarySeparatorByte; }
-  public byte getRowSeparator() { return rowSeparatorByte; }
-  public byte getMapSeparator() { return mapSeparatorByte; }
+  public String getPrimarySeparator() { return primarySeparator; }
+  public String getSecondarySeparator() { return secondarySeparator; }
+  public String getRowSeparator() { return rowSeparator; }
+  public String getMapSeparator() { return mapSeparator; }
 
 
   /**
@@ -164,6 +173,17 @@
   protected int bufferSize;
 
   /**
+   * The string representing nulls in the serialized data. e.g., \N as in mysql
+   */
+  protected String nullString;
+
+  /**
+   * The nullString in bytes
+   */ 
+  protected byte nullBuf[];
+
+
+  /**
    * A convenience class for tokenizing a TTransport
    */
 
@@ -174,12 +194,12 @@
     final String separator;
     byte buf[];
 
-    public SimpleTransportTokenizer(TTransport trans, byte separator, int buffer_length) {
+    public SimpleTransportTokenizer(TTransport trans, String separator, int buffer_length) {
       this.trans = trans;
-      byte [] separators = new byte[1];
-      separators[0] = separator;
-      this.separator = new String(separators);
+      this.separator = separator;
       buf = new byte[buffer_length];
+      // do not fill tokenizer until user requests since filling it could read in data
+      // not meant for this instantiation.
       fillTokenizer();
     }
 
@@ -191,7 +211,7 @@
             return false;
           }
           String row = new String(buf, 0, length);
-          tokenizer = new StringTokenizer(row, new String(separator), true);
+          tokenizer = new StringTokenizer(row, separator, true);
         } catch(TTransportException e) {
           e.printStackTrace();
           tokenizer = null;
@@ -204,6 +224,10 @@
       StringBuffer ret = null;
       boolean done = false;
 
+      if(tokenizer == null) {
+        fillTokenizer();
+      }
+
       while(! done) {
 
         if(! tokenizer.hasMoreTokens()) {
@@ -211,7 +235,6 @@
             break;
           }
         }
-
         try {
           final String nextToken = tokenizer.nextToken();
 
@@ -229,7 +252,8 @@
           done = true;
         }
       } // while ! done
-      return ret == null ? null : ret.toString();
+      final String theRet = ret == null ? null : ret.toString();
+      return theRet;
     }
   };
 
@@ -242,23 +266,23 @@
    */
 
   public TCTLSeparatedProtocol(TTransport trans) {
-    this(trans, defaultPrimarySeparatorByte, defaultSecondarySeparatorByte, defaultMapSeparatorByte, defaultRowSeparatorByte, false, 4096);
+    this(trans, defaultPrimarySeparator, defaultSecondarySeparator, defaultMapSeparator, defaultRowSeparator, false, 4096);
   }
 
   public TCTLSeparatedProtocol(TTransport trans, int buffer_size) {
-    this(trans, defaultPrimarySeparatorByte, defaultSecondarySeparatorByte, defaultMapSeparatorByte, defaultRowSeparatorByte, false, buffer_size);
+    this(trans, defaultPrimarySeparator, defaultSecondarySeparator, defaultMapSeparator, defaultRowSeparator, false, buffer_size);
   }
 
   /**
    * @param trans - the ttransport to use as input or output
-   * @param primarySeparatorByte the separator between columns (aka fields)
-   * @param secondarySeparatorByte the separator within a field for things like sets and maps and lists
-   * @param mapSeparatorByte - the key/value separator
-   * @param rowSeparatorByte - the record separator
+   * @param primarySeparator the separator between columns (aka fields)
+   * @param secondarySeparator the separator within a field for things like sets and maps and lists
+   * @param mapSeparator - the key/value separator
+   * @param rowSeparator - the record separator
    * @param returnNulls - whether to return a null or an empty string for fields that seem empty (ie two primary separators back to back)
    */
 
-  public TCTLSeparatedProtocol(TTransport trans, byte primarySeparatorByte, byte secondarySeparatorByte, byte mapSeparatorByte, byte rowSeparatorByte, 
+  public TCTLSeparatedProtocol(TTransport trans, String primarySeparator, String secondarySeparator, String mapSeparator, String rowSeparator, 
                                boolean returnNulls,
                                int bufferSize) {
     super(trans);
@@ -266,34 +290,103 @@
     returnNulls = returnNulls;
 
 
-    this.primarySeparatorByte = primarySeparatorByte;
-    this.secondarySeparatorByte = secondarySeparatorByte;
-    this.rowSeparatorByte = rowSeparatorByte;
-    this.mapSeparatorByte = mapSeparatorByte;
+    this.primarySeparator = primarySeparator;
+    this.secondarySeparator = secondarySeparator;
+    this.rowSeparator = rowSeparator;
+    this.mapSeparator = mapSeparator;
 
     this.innerTransport = trans;
     this.bufferSize = bufferSize;
+    this.nullString  = "\\N";
 
     internalInitialize();
   }
 
 
+
   /**
    * Sets the internal separator patterns and creates the internal tokenizer.
    */
   protected void internalInitialize() {
-    byte []primarySeparator = new byte[1];
-    byte []secondarySeparator = new byte[1];
-    primarySeparator[0] = primarySeparatorByte;
-    secondarySeparator[0] = secondarySeparatorByte;
-
-    primaryPattern = Pattern.compile(new String(primarySeparator));
-    secondaryPattern = Pattern.compile(new String(secondarySeparator));
-    mapPattern = Pattern.compile("\\0" + secondarySeparatorByte + "|\\0" + mapSeparatorByte);
 
-    transportTokenizer = new SimpleTransportTokenizer(innerTransport, rowSeparatorByte, bufferSize);
+    // in the future could allow users to specify a quote character that doesn't need escaping but for now ...
+    final String primaryPatternString = 
+      quote == null ? primarySeparator : 
+      "(?:^|" + primarySeparator + ")(" + quote + "(?:[^"  + quote + "]+|" + quote + quote + ")*" + quote + "|[^" + primarySeparator + "]*)";
+
+    if (quote != null) {
+      stripSeparatorPrefix = Pattern.compile("^" + primarySeparator);
+      stripQuotePrefix = Pattern.compile("^" + quote);
+      stripQuotePostfix = Pattern.compile(quote + "$");
+    }      
+
+    primaryPattern = Pattern.compile(primaryPatternString);
+    secondaryPattern = Pattern.compile(secondarySeparator);
+    mapPattern = Pattern.compile(secondarySeparator + "|" + mapSeparator);
+    nullBuf = nullString.getBytes();
+    transportTokenizer = new SimpleTransportTokenizer(innerTransport, rowSeparator, bufferSize);
+  }
+
+  /**
+   * For quoted fields, strip away the quotes and also need something to strip away the control separator when using
+   * complex split method defined here.
+   */
+  protected Pattern stripSeparatorPrefix;
+  protected Pattern stripQuotePrefix;
+  protected Pattern stripQuotePostfix;
+
+
+  /** 
+   *
+   * Split the line based on a complex regex pattern
+   *
+   * @param line the current row
+   * @param p the pattern for matching fields in the row
+   * @return List of Strings - not including the separator in them
+   */
+  protected String[] complexSplit(String line, Pattern p) {
+
+    ArrayList<String> list = new ArrayList<String>();
+    Matcher m = p.matcher(line);
+    // For each field
+    while (m.find()) {
+      String match = m.group();
+      if (match == null)
+        break;
+      if (match.length() == 0)
+        match = null;
+      else {
+        if(stripSeparatorPrefix.matcher(match).find()) {
+          match = match.substring(1);
+        }
+        if(stripQuotePrefix.matcher(match).find()) {
+          match = match.substring(1);
+        }
+        if(stripQuotePostfix.matcher(match).find()) {
+          match = match.substring(0,match.length() - 1);
+        }
+      }
+      list.add(match);
+    }
+    return (String[])list.toArray(new String[1]);
   }
 
+
+
+  protected String getByteValue(String altValue, String defaultVal) {
+    if (altValue != null && altValue.length() > 0) {
+      try {
+        byte b [] = new byte[1];
+        b[0] = Byte.valueOf(altValue).byteValue();
+        return new String(b);
+      } catch(NumberFormatException e) {
+        return altValue;
+      }
+    }
+    return defaultVal;
+  }
+  
+
   /**
    * Initialize the TProtocol
    * @param conf System properties
@@ -301,13 +394,16 @@
    * @throws TException
    */
   public void initialize(Configuration conf, Properties tbl) throws TException {
-    primarySeparatorByte = Byte.valueOf(tbl.getProperty(Constants.FIELD_DELIM, String.valueOf(primarySeparatorByte))).byteValue();
-    LOG.debug("collections delim=<" + tbl.getProperty(Constants.COLLECTION_DELIM) + ">" );
-    secondarySeparatorByte = Byte.valueOf(tbl.getProperty(Constants.COLLECTION_DELIM, String.valueOf(secondarySeparatorByte))).byteValue();
-    rowSeparatorByte = Byte.valueOf(tbl.getProperty(Constants.LINE_DELIM, String.valueOf(rowSeparatorByte))).byteValue();
-    mapSeparatorByte = Byte.valueOf(tbl.getProperty(Constants.MAPKEY_DELIM, String.valueOf(mapSeparatorByte))).byteValue();
+
+
+    primarySeparator = getByteValue(tbl.getProperty(Constants.FIELD_DELIM), primarySeparator);
+    secondarySeparator = getByteValue(tbl.getProperty(Constants.COLLECTION_DELIM), secondarySeparator);
+    rowSeparator = getByteValue(tbl.getProperty(Constants.LINE_DELIM), rowSeparator);
+    mapSeparator = getByteValue(tbl.getProperty(Constants.MAPKEY_DELIM), mapSeparator);
     returnNulls = Boolean.valueOf(tbl.getProperty(ReturnNullsKey, String.valueOf(returnNulls))).booleanValue();
     bufferSize =  Integer.valueOf(tbl.getProperty(BufferSizeKey, String.valueOf(bufferSize))).intValue();
+    nullString  = tbl.getProperty(Constants.SERIALIZATION_NULL_FORMAT, "\\N");
+    quote  = tbl.getProperty(Constants.QUOTE_CHAR, null);
 
     internalInitialize();
 
@@ -329,7 +425,7 @@
 
   public void writeFieldBegin(TField field) throws TException {
     if(! firstField) {
-      writeByte(primarySeparatorByte);
+      internalWriteString(primarySeparator);
     }
     firstField = false;
   }
@@ -424,21 +520,34 @@
     writeString(String.valueOf(dub));
   }
 
+  public void internalWriteString(String str) throws TException {
+    if(str != null) {
+      final byte buf[] = str.getBytes();
+      trans_.write(buf, 0, buf.length);
+    } else {
+      trans_.write(nullBuf, 0, nullBuf.length);
+    }
+  }
+
   public void writeString(String str) throws TException {
     if(inner) {
       if(!firstInnerField) {
         // super hack city notice the mod plus only happens after firstfield hit, so == 0 is right.
         if(isMap && elemIndex++ % 2 == 0) {
-          writeByte(mapSeparatorByte);
+          internalWriteString(mapSeparator);
         } else {
-          writeByte(secondarySeparatorByte);
+          internalWriteString(secondarySeparator);
         }
       } else {
         firstInnerField = false;
       }
     }
-    final byte buf[] = str.getBytes();
-    trans_.write(buf, 0, buf.length);
+    if(str != null) {
+      final byte buf[] = str.getBytes();
+      trans_.write(buf, 0, buf.length);
+    } else {
+      trans_.write(nullBuf, 0, nullBuf.length);
+    }
   }
 
   public void writeBinary(byte[] bin) throws TException {
@@ -456,7 +565,7 @@
    assert(!inner);
    try {
      final String tmp = transportTokenizer.nextToken();
-     columns = primaryPattern.split(tmp);
+     columns = quote == null ? primaryPattern.split(tmp) : complexSplit(tmp, primaryPattern);
      index = 0;
      return new TStruct();
    } catch(EOFException e) {
@@ -468,6 +577,20 @@
     columns = null;
   }
 
+
+  /**
+   * Skip past the current field
+   * Just increments the field index counter.
+   */
+  public void skip(byte type) {
+    if( inner) {
+      innerIndex++;
+    } else {
+      index++;
+    }
+  }
+
+
   public TField readFieldBegin() throws TException {
     assert( !inner);
     TField f = new TField();
@@ -483,11 +606,19 @@
   public TMap readMapBegin() throws TException {
     assert( !inner);
     TMap map = new TMap();
-    fields = mapPattern.split(columns[index++]);
-    if(fields != null) {
-      map.size = fields.length/2;
-    } else {
+    if(columns[index] == null ||
+       columns[index].equals(nullString)) {
+      index++;
+      if(returnNulls) {
+        return null;
+      }
+      map.size = 0;
+    } else if(columns[index].isEmpty()) {
       map.size = 0;
+      index++;
+    } else {
+      fields = mapPattern.split(columns[index++]);
+      map.size = fields.length/2;
     }
     innerIndex = 0;
     inner = true;
@@ -503,11 +634,19 @@
   public TList readListBegin() throws TException {
     assert( !inner);
     TList list = new TList();
-    fields = secondaryPattern.split(columns[index++]);
-    if(fields != null) {
-      list.size = fields.length ;
-    } else {
+    if(columns[index] == null ||
+       columns[index].equals(nullString)) {
+      index++;
+      if(returnNulls) {
+        return null;
+      }
+     list.size = 0;
+    } else if(columns[index].isEmpty()) {
       list.size = 0;
+      index++;
+    } else {
+      fields = secondaryPattern.split(columns[index++]);
+      list.size = fields.length ;
     }
     innerIndex = 0;
     inner = true;
@@ -521,53 +660,88 @@
   public TSet readSetBegin() throws TException {
     assert( !inner);
     TSet set = new TSet();
-    fields = secondaryPattern.split(columns[index++]);
-    if(fields != null) {
-      set.size = fields.length ;
-    } else {
+    if(columns[index] == null ||
+       columns[index].equals(nullString)) {
+      index++;
+      if(returnNulls) {
+        return null;
+      }
       set.size = 0;
+    } else if(columns[index].isEmpty()) {
+      set.size = 0;
+      index++;
+    } else {
+      fields = secondaryPattern.split(columns[index++]);
+      set.size = fields.length ;
     }
     inner = true;
     innerIndex = 0;
     return set;
   }
 
+  protected boolean lastPrimitiveWasNullFlag;
+
+  public boolean lastPrimitiveWasNull() throws TException {
+    return lastPrimitiveWasNullFlag;
+  }
+
+  public void writeNull() throws TException {
+    writeString(null);
+  }
+
   public void readSetEnd() throws TException {
     inner = false;
   }
+
   public boolean readBool() throws TException {
-    return Boolean.valueOf(readString()).booleanValue();
+    String val = readString();
+    lastPrimitiveWasNullFlag = val == null;
+    return val == null || val.isEmpty() ? false : Boolean.valueOf(val).booleanValue();
   }
 
   public byte readByte() throws TException {
-    return Byte.valueOf(readString()).byteValue();
+    String val = readString();
+    lastPrimitiveWasNullFlag = val == null;
+    return  val == null || val.isEmpty() ? 0 : Byte.valueOf(val).byteValue();
   }
 
   public short readI16() throws TException {
-    return Short.valueOf(readString()).shortValue();
+    String val = readString();
+    lastPrimitiveWasNullFlag = val == null;
+    return val == null || val.isEmpty() ? 0 : Short.valueOf(val).shortValue();
   }
 
   public int readI32() throws TException {
-    return Integer.valueOf(readString()).intValue();
+    String val = readString();
+    lastPrimitiveWasNullFlag = val == null;
+    return val == null || val.isEmpty() ? 0 : Integer.valueOf(val).intValue();
   }
 
   public long readI64() throws TException {
-    return Long.valueOf(readString()).longValue();
+    String val = readString();
+    lastPrimitiveWasNullFlag = val == null;
+    return val == null || val.isEmpty() ? 0 : Long.valueOf(val).longValue();
   }
 
   public double readDouble() throws TException {
-    return Double.valueOf(readString()).doubleValue();
+    String val = readString();
+    lastPrimitiveWasNullFlag = val == null;
+    return val == null || val.isEmpty() ? 0 :Double.valueOf(val).doubleValue();
   }
 
-  protected String [] curMapPair;
   public String readString() throws TException {
     String ret;
     if(!inner) {
-      ret = columns != null && index < columns.length ? columns[index++] : null;
+      ret = columns != null && index < columns.length ? columns[index] :  null;
+      index++;
     } else {
-      ret = fields != null && innerIndex < fields.length ? fields[innerIndex++] : null;
+      ret = fields != null && innerIndex < fields.length ? fields[innerIndex] : null;
+      innerIndex++;
     }
-    return ret == null && ! returnNulls ? "" : ret;
+    if(ret == null || ret.equals(nullString))
+      return returnNulls ? null : "";
+    else
+      return ret;
   }
 
   public byte[] readBinary() throws TException {

Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/WriteNullsProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/WriteNullsProtocol.java?rev=712905&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/WriteNullsProtocol.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/WriteNullsProtocol.java Mon Nov 10 17:50:06 2008
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.thrift;
+
+import com.facebook.thrift.TException;
+
+/**
+ * An interface for TProtocols that actually write out nulls - 
+ * This should be for all those that don't actually use
+ * fieldids in the written data like TCTLSeparatedProtocol.
+ * 
+ */
+public interface WriteNullsProtocol {
+  /**
+   * Was the last primitive read really a NULL. Need
+   * only be called when the value of the primitive
+   * was 0. ie the protocol should return 0 on nulls
+   * and the caller will then check if it was actually null
+   * For boolean this is false.
+   */
+  public boolean lastPrimitiveWasNull() throws TException;
+
+  /**
+   * Write a null 
+   */
+  public void writeNull() throws TException;
+
+}

Modified: hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/TestTCTLSeparatedProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/TestTCTLSeparatedProtocol.java?rev=712905&r1=712904&r2=712905&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/TestTCTLSeparatedProtocol.java (original)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/TestTCTLSeparatedProtocol.java Mon Nov 10 17:50:06 2008
@@ -18,6 +18,9 @@
 
 package org.apache.hadoop.hive.serde2;
 
+
+import org.apache.hadoop.hive.serde.Constants;
+
 import junit.framework.TestCase;
 import java.io.*;
 import org.apache.hadoop.hive.serde2.*;
@@ -27,6 +30,7 @@
 import com.facebook.thrift.transport.*;
 import com.facebook.thrift.*;
 import com.facebook.thrift.protocol.*;
+import org.apache.hadoop.conf.Configuration;
 
 public class TestTCTLSeparatedProtocol extends TestCase {
 
@@ -71,7 +75,7 @@
 
 
     // use 3 as the row buffer size to force lots of re-buffering.
-    TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 3);
+    TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 1024);
 
     prot.readStructBegin();
 
@@ -116,7 +120,7 @@
   public void testWrites() throws Exception {
     try {
     TMemoryBuffer trans = new TMemoryBuffer(1024);
-    TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 3);
+    TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 1024);
 
     prot.writeStructBegin(new TStruct());
     prot.writeFieldBegin(new TField());
@@ -164,8 +168,6 @@
     String test = new String(b, 0, len);
 
     String testRef = "100348.55234.22hello world!key1val1key2val2key3val3elem1elem2bye!";
-    //    System.err.println("test=" + test + ">");
-    //    System.err.println(" ref=" + testRef + ">");
     assertTrue(test.equals(testRef));
 
     trans = new TMemoryBuffer(1023);
@@ -242,4 +244,263 @@
     }
   }
 
+  public void testQuotedWrites() throws Exception {
+    try {
+    TMemoryBuffer trans = new TMemoryBuffer(4096);
+    TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 4096);
+    Properties schema = new Properties();
+    schema.setProperty(Constants.QUOTE_CHAR, "\"");
+    schema.setProperty(Constants.FIELD_DELIM, ",");
+    prot.initialize(new Configuration(), schema);
+
+    String testStr = "\"hello, world!\"";
+
+    prot.writeStructBegin(new TStruct());
+
+    prot.writeFieldBegin(new TField());
+    prot.writeString(testStr);
+    prot.writeFieldEnd();
+
+    prot.writeFieldBegin(new TField());
+    prot.writeListBegin(new TList());
+    prot.writeString("elem1");
+    prot.writeString("elem2");
+    prot.writeListEnd();
+    prot.writeFieldEnd();
+
+    prot.writeStructEnd();
+    prot.writeString("\n");
+
+    trans.flush();
+
+    byte b[] = new byte[4096];
+    int len = trans.read(b,0,b.length);
+
+
+    trans = new TMemoryBuffer(4096);
+    trans.write(b,0,len);
+    prot = new TCTLSeparatedProtocol(trans, 1024);
+    prot.initialize(new Configuration(), schema);
+
+    prot.readStructBegin();
+    prot.readFieldBegin();
+    final String firstRead = prot.readString();
+    prot.readFieldEnd();
+
+    testStr = testStr.replace("\"","");
+
+    assertEquals(testStr, firstRead);
+
+
+    // the 2 element list
+    prot.readFieldBegin();
+    TList l = prot.readListBegin();
+    assertTrue(l.size == 2);
+    assertTrue(prot.readString().equals("elem1"));
+    assertTrue(prot.readString().equals("elem2"));
+    prot.readListEnd();
+    prot.readFieldEnd();
+
+    // shouldl return nulls at end
+    prot.readFieldBegin();
+    assertTrue(prot.readString().equals(""));
+    prot.readFieldEnd();
+
+    // shouldl return nulls at end
+    prot.readFieldBegin();
+    assertTrue(prot.readString().equals(""));
+    prot.readFieldEnd();
+
+    prot.readStructEnd();
+
+
+    } catch(Exception e) {
+      e.printStackTrace();
+    }
+  }
+
+
+  /**
+   * Tests a sample apache log format. This is actually better done in general with a more TRegexLike protocol, but for this
+   * case, TCTLSeparatedProtocol can do it. 
+   */
+  public void test1ApacheLogFormat() throws Exception {
+    try {
+      final String sample = "127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326";
+
+      TMemoryBuffer trans = new TMemoryBuffer(4096);
+      trans.write(sample.getBytes(), 0, sample.getBytes().length);
+      trans.flush();
+
+      TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 4096);
+      Properties schema = new Properties();
+
+      // this is a hacky way of doing the quotes since it will match any 2 of these, so
+      // "[ hello this is something to split [" would be considered to be quoted.
+      schema.setProperty(Constants.QUOTE_CHAR, "(\"|\\[|\\])");
+
+      schema.setProperty(Constants.FIELD_DELIM, " ");
+      schema.setProperty(Constants.SERIALIZATION_NULL_FORMAT, "-");
+      prot.initialize(new Configuration(), schema);
+
+      prot.readStructBegin();
+
+      // ip address
+      prot.readFieldBegin();
+      final String ip = prot.readString();
+      prot.readFieldEnd();
+
+      assertEquals("127.0.0.1", ip);
+
+      //  identd
+      prot.readFieldBegin();
+      final String identd = prot.readString();
+      prot.readFieldEnd();
+
+      assertEquals("", identd);
+
+      //  user
+      prot.readFieldBegin();
+      final String user = prot.readString();
+      prot.readFieldEnd();
+
+      assertEquals("frank",user);
+
+      //  finishTime
+      prot.readFieldBegin();
+      final String finishTime = prot.readString();
+      prot.readFieldEnd();
+
+      assertEquals("10/Oct/2000:13:55:36 -0700",finishTime);
+
+      //  requestLine
+      prot.readFieldBegin();
+      final String requestLine = prot.readString();
+      prot.readFieldEnd();
+
+      assertEquals("GET /apache_pb.gif HTTP/1.0",requestLine);
+
+      //  returncode
+      prot.readFieldBegin();
+      final int returnCode = prot.readI32();
+      prot.readFieldEnd();
+
+      assertEquals(200, returnCode);
+
+      //  return size
+      prot.readFieldBegin();
+      final int returnSize = prot.readI32();
+      prot.readFieldEnd();
+
+      assertEquals(2326, returnSize);
+
+      prot.readStructEnd();
+
+    } catch(Exception e) {
+      e.printStackTrace();
+    }
+  }
+
+
+
+  public void testNulls() throws Exception {
+    try {
+    TMemoryBuffer trans = new TMemoryBuffer(1024);
+    TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 10);
+
+    prot.writeStructBegin(new TStruct());
+
+    prot.writeFieldBegin(new TField());
+    prot.writeString(null);
+    prot.writeFieldEnd();
+
+    prot.writeFieldBegin(new TField());
+    prot.writeString(null);
+    prot.writeFieldEnd();
+
+    prot.writeFieldBegin(new TField());
+    prot.writeI32(100);
+    prot.writeFieldEnd();
+
+    prot.writeFieldBegin(new TField());
+    prot.writeString(null);
+    prot.writeFieldEnd();
+
+    prot.writeFieldBegin(new TField());
+    prot.writeMapBegin(new TMap());
+    prot.writeString(null);
+    prot.writeString(null);
+    prot.writeString("key2");
+    prot.writeString(null);
+    prot.writeString(null);
+    prot.writeString("val3");
+    prot.writeMapEnd();
+    prot.writeFieldEnd();
+
+    prot.writeStructEnd();
+
+    byte b[] = new byte[3*1024];
+    int len = trans.read(b,0,b.length);
+    String written = new String(b,0,len);
+
+    String testRef = "\\N\\N100\\N\\N\\Nkey2\\N\\Nval3";
+
+    assertTrue(testRef.equals(written));
+
+    trans = new TMemoryBuffer(1023);
+    trans.write(b, 0, len);
+
+    prot = new TCTLSeparatedProtocol(trans, 3);
+    
+    prot.readStructBegin();
+
+    prot.readFieldBegin();
+    String ret = prot.readString();
+    prot.readFieldEnd();
+
+    assertTrue(ret.equals(""));
+
+    prot.readFieldBegin();
+    ret = prot.readString();
+    prot.readFieldEnd();
+    
+    assertTrue(ret.equals(""));
+
+    prot.readFieldBegin();
+    int ret1 = prot.readI32();
+    prot.readFieldEnd();
+
+    assertTrue(ret1 == 100);
+
+
+    prot.readFieldBegin();
+    ret1 = prot.readI32();
+    prot.readFieldEnd();
+
+    prot.readFieldBegin();
+    TMap map =  prot.readMapBegin();
+
+    assertTrue(map.size == 3);
+
+    assertTrue(prot.readString().isEmpty());
+    assertTrue(prot.readString().isEmpty());
+
+    assertTrue(prot.readString().equals("key2"));
+    assertTrue(prot.readString().isEmpty());
+
+    assertTrue(prot.readString().isEmpty());
+    assertTrue(prot.readString().equals("val3"));
+    
+    prot.readMapEnd();
+    prot.readFieldEnd();
+
+    assertTrue(ret1 == 0);
+
+    } catch(Exception e) {
+      e.printStackTrace();
+    }
+  }
+
+
+
 }

Modified: hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/dynamic_type/TestDynamicSerDe.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/dynamic_type/TestDynamicSerDe.java?rev=712905&r1=712904&r2=712905&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/dynamic_type/TestDynamicSerDe.java (original)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/dynamic_type/TestDynamicSerDe.java Mon Nov 10 17:50:06 2008
@@ -20,8 +20,11 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
+import java.util.Map;
 import java.util.List;
 import java.util.Properties;
+import java.util.Random;
+import java.util.Map.Entry;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.serde.Constants;
@@ -35,6 +38,13 @@
 
 public class TestDynamicSerDe extends TestCase {
 
+  public static HashMap<String,String> makeHashMap(String... params) {
+    HashMap<String,String> r = new HashMap<String,String>(); 
+    for(int i=0; i<params.length; i+=2) {
+      r.put(params[i], params[i+1]);
+    }
+    return r;
+  }
   public void testDynamicSerDe() throws Throwable {
     try {
 
@@ -49,24 +59,41 @@
       struct.add(Integer.valueOf(234));
       struct.add(bye);
       struct.add(another);
+      struct.add(Integer.valueOf(-234));
+      struct.add(Double.valueOf(1.0));
+      struct.add(Double.valueOf(-2.5));
+      
       
       // All protocols
       ArrayList<String> protocols = new ArrayList<String>();
       ArrayList<Boolean> isBinaries = new ArrayList<Boolean>();
-      
+      ArrayList<HashMap<String,String>> additionalParams = new ArrayList<HashMap<String,String>>();
+
+      protocols.add(org.apache.hadoop.hive.serde2.thrift.TBinarySortableProtocol.class.getName());
+      isBinaries.add(true);
+      additionalParams.add(makeHashMap("serialization.sort.order", "++++++"));
+      protocols.add(org.apache.hadoop.hive.serde2.thrift.TBinarySortableProtocol.class.getName());
+      isBinaries.add(true);
+      additionalParams.add(makeHashMap("serialization.sort.order", "------"));
+
+
       protocols.add(com.facebook.thrift.protocol.TBinaryProtocol.class.getName());
       isBinaries.add(true);
+      additionalParams.add(null);
       
       protocols.add(com.facebook.thrift.protocol.TJSONProtocol.class.getName());
       isBinaries.add(false);
+      additionalParams.add(null);
 
       // TSimpleJSONProtocol does not support deserialization.
       // protocols.add(com.facebook.thrift.protocol.TSimpleJSONProtocol.class.getName());
       // isBinaries.add(false);
+      // additionalParams.add(null);
       
       // TCTLSeparatedProtocol is not done yet.
       protocols.add(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
       isBinaries.add(false);
+      additionalParams.add(null);
       
       System.out.println("input struct = " + struct);
         
@@ -80,8 +107,14 @@
         schema.setProperty(Constants.SERIALIZATION_FORMAT, protocol);
         schema.setProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME, "test");
         schema.setProperty(Constants.SERIALIZATION_DDL,
-            "struct test { i32 hello, list<string> bye, map<string,i32> another}");
+        "struct test { i32 hello, list<string> bye, map<string,i32> another, i32 nhello, double d, double nd}");
         schema.setProperty(Constants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
+        HashMap<String, String> p = additionalParams.get(pp);
+        if (p != null) {
+          for(Entry<String, String> e: p.entrySet()) {
+            schema.setProperty(e.getKey(), e.getValue());
+          }
+        }
   
         DynamicSerDe serde = new DynamicSerDe();
         serde.initialize(new Configuration(), schema);
@@ -93,15 +126,8 @@
         
         // Try to serialize
         BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
-        
-        StringBuilder sb = new StringBuilder();
-        for (int i=0; i<bytes.getSize(); i++) {
-          byte b = bytes.get()[i];
-          int v = (b<0 ? 256 + b : b);
-          sb.append(String.format("x%02x", v));
-        }
-        System.out.println("bytes =" + sb);
-        
+        System.out.println("bytes =" + hexString(bytes)); 
+       
         if (!isBinary) {
           System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()));
         }
@@ -116,7 +142,7 @@
         System.out.println("o[2] class = " + olist.get(2).getClass());
         System.out.println("o = " + o);
         
-        assertEquals(o, struct);
+        assertEquals(struct, o);
       }
       
     } catch (Throwable e) {
@@ -124,9 +150,216 @@
       throw e;
     }
     
+  }
+  public String hexString(BytesWritable bytes) {
+    StringBuilder sb = new StringBuilder();
+    for (int i=0; i<bytes.getSize(); i++) {
+      byte b = bytes.get()[i];
+      int v = (b<0 ? 256 + b : b);
+      sb.append(String.format("x%02x", v));
+    }
+    return sb.toString();
+  }  
+
+
+  private void testTBinarySortableProtocol(Object[] structs, String ddl, boolean ascending) throws Throwable{
+    int fields = ((List)structs[structs.length-1]).size();
+    String order = "";
+    for(int i=0; i<fields; i++) {
+      order = order + (ascending ? "+" : "-"); 
+    }
+    
+    Properties schema = new Properties();
+    schema.setProperty(Constants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TBinarySortableProtocol.class.getName());
+    schema.setProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME, "test");
+    schema.setProperty(Constants.SERIALIZATION_DDL, ddl);
+    schema.setProperty(Constants.SERIALIZATION_LIB, DynamicSerDe.class.getName());
+    schema.setProperty(Constants.SERIALIZATION_SORT_ORDER, order);
+
+    DynamicSerDe serde = new DynamicSerDe();
+    serde.initialize(new Configuration(), schema);
+
+    ObjectInspector oi = serde.getObjectInspector();
+
+    // Try to serialize
+    BytesWritable bytes[] = new BytesWritable[structs.length];
+    for (int i=0; i<structs.length; i++) {
+      bytes[i] = new BytesWritable();
+      BytesWritable s = (BytesWritable)serde.serialize(structs[i], oi);
+      bytes[i].set(s);
+      if (i>0) {
+        int compareResult = bytes[i-1].compareTo(bytes[i]);
+        if ( (compareResult<0 && !ascending) || (compareResult>0 && ascending) ) {
+          System.out.println("Test failed in " + (ascending ? "ascending" : "descending") + " order.");
+          System.out.println("serialized data of " + structs[i-1] + " = " + hexString(bytes[i-1]));
+          System.out.println("serialized data of " + structs[i] + " = " + hexString(bytes[i]));
+          fail("Sort order of serialized " + structs[i-1] + " and " + structs[i] + " are reversed!");            
+        }
+      }
+    }
+
+    // Try to deserialize
+    Object[] deserialized = new Object[structs.length];
+    for (int i=0; i<structs.length; i++) {
+      deserialized[i] = serde.deserialize(bytes[i]);
+      if (!structs[i].equals(deserialized[i])) {
+        System.out.println("structs[i] = " + structs[i]);
+        System.out.println("deserialized[i] = " + deserialized[i]);
+        System.out.println("serialized[i] = " + hexString(bytes[i]));
+        assertEquals(structs[i], deserialized[i]);
+      }
+    }
+  }
+  
+  static int compare(Object a, Object b) {
+    if (a == null && b == null) return 0;
+    if (a == null) return -1;
+    if (b == null) return 1;
+    if (a instanceof List) {
+      List la = (List)a;
+      List lb = (List)b;
+      assert(la.size() == lb.size());
+      for (int i=0; i<la.size(); i++) {
+        int r = compare(la.get(i), lb.get(i));
+        if (r != 0) return r;
+      }
+      return 0;
+    } else if (a instanceof Number) {
+      Number na = (Number) a;
+      Number nb = (Number) b;
+      if (na.doubleValue() < nb.doubleValue()) return -1;
+      if (na.doubleValue() > nb.doubleValue()) return 1;
+      return 0;      
+    } else if (a instanceof String) {
+      String sa = (String) a;
+      String sb = (String) b;
+      return sa.compareTo(sb);
+    } 
+    return 0;
+  }
+  
+  private void sort(Object[] structs) {
+    for (int i=0; i<structs.length; i++) for (int j=i+1; j<structs.length; j++)
+      if (compare(structs[i], structs[j])>0) {
+        Object t = structs[i];
+        structs[i] = structs[j];
+        structs[j] = t;
+      }
   }  
 
+  public void testTBinarySortableProtocol() throws Throwable {
+    try {
 
+      System.out.println("Beginning Test testTBinarySortableProtocol:");
+      
+      int num = 100;
+      Random r = new Random(1234);
+      Object structs[] = new Object[num];
+      String ddl;
+      
+      // Test double
+      for (int i=0; i<num; i++) {
+        ArrayList<Object> struct = new ArrayList<Object>();
+        if (i==0) {
+          struct.add(null);
+        } else {
+          struct.add(Double.valueOf((r.nextDouble()-0.5)*10));
+        }
+        structs[i] = struct;
+      }
+      sort(structs);
+      ddl = "struct test { double hello}";
+      System.out.println("Testing " + ddl);
+      testTBinarySortableProtocol(structs, ddl, true);
+      testTBinarySortableProtocol(structs, ddl, false);
+
+      // Test integer
+      for (int i=0; i<num; i++) {
+        ArrayList<Object> struct = new ArrayList<Object>();
+        if (i==0) {
+          struct.add(null);
+        } else {
+          struct.add((int)((r.nextDouble()-0.5)*1.5*Integer.MAX_VALUE));
+        }
+        structs[i] = struct;
+      }
+      sort(structs);
+      // Null should be smaller than any other value, so put a null at the front end
+      // to test whether that is held.
+      ((List)structs[0]).set(0, null);
+      ddl = "struct test { i32 hello}";
+      System.out.println("Testing " + ddl);
+      testTBinarySortableProtocol(structs, ddl, true);
+      testTBinarySortableProtocol(structs, ddl, false);
+
+      // Test long
+      for (int i=0; i<num; i++) {
+        ArrayList<Object> struct = new ArrayList<Object>();
+        if (i==0) {
+          struct.add(null);
+        } else {
+          struct.add((long)((r.nextDouble()-0.5)*1.5*Long.MAX_VALUE));
+        }
+        structs[i] = struct;
+      }
+      sort(structs);
+      // Null should be smaller than any other value, so put a null at the front end
+      // to test whether that is held.
+      ((List)structs[0]).set(0, null);
+      ddl = "struct test { i64 hello}";
+      System.out.println("Testing " + ddl);
+      testTBinarySortableProtocol(structs, ddl, true);
+      testTBinarySortableProtocol(structs, ddl, false);
+
+      // Test string
+      for (int i=0; i<num; i++) {
+        ArrayList<Object> struct = new ArrayList<Object>();
+        if (i==0) {
+          struct.add(null);
+        } else {
+          struct.add(String.valueOf((r.nextDouble()-0.5)*1000));
+        }
+        structs[i] = struct;
+      }
+      sort(structs);
+      // Null should be smaller than any other value, so put a null at the front end
+      // to test whether that is held.
+      ((List)structs[0]).set(0, null);
+      ddl = "struct test { string hello}";
+      System.out.println("Testing " + ddl);
+      testTBinarySortableProtocol(structs, ddl, true);
+      testTBinarySortableProtocol(structs, ddl, false);
+
+      // Test string + double
+      for (int i=0; i<num; i++) {
+        ArrayList<Object> struct = new ArrayList<Object>();
+        if (i%9==0) {
+          struct.add(null);
+        } else {
+          struct.add("str" + (i/5));
+        }
+        if (i%7==0) {
+          struct.add(null);
+        } else {
+          struct.add(Double.valueOf((r.nextDouble()-0.5)*10));
+        }
+        structs[i] = struct;
+      }
+      sort(structs);
+      // Null should be smaller than any other value, so put a null at the front end
+      // to test whether that is held.
+      ((List)structs[0]).set(0, null);
+      ddl = "struct test { string hello, double another}";
+      System.out.println("Testing " + ddl);
+      testTBinarySortableProtocol(structs, ddl, true);
+      testTBinarySortableProtocol(structs, ddl, false);
+      
+      System.out.println("Test testTBinarySortableProtocol passed!");
+    } catch (Throwable e) {
+      e.printStackTrace();
+      throw e;
+    }
+  }
 
 
   public void testConfigurableTCTLSeparated() throws Throwable {
@@ -161,20 +394,14 @@
       serde.initialize(new Configuration(), schema);
 
       TCTLSeparatedProtocol prot = (TCTLSeparatedProtocol)serde.oprot_;
-      assertTrue(prot.getPrimarySeparator() == 9);
+      assertTrue(prot.getPrimarySeparator().equals("\u0009"));
       
       ObjectInspector oi = serde.getObjectInspector();
 
       // Try to serialize
       BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
         
-      StringBuilder sb = new StringBuilder();
-      for (int i=0; i<bytes.getSize(); i++) {
-        byte b = bytes.get()[i];
-        int v = (b<0 ? 256 + b : b);
-        sb.append(String.format("x%02x", v));
-      }
-      System.out.println("bytes =" + sb);
+      hexString(bytes);
 
       String compare = "234" + "\u0009" + "firstString" + "\u0001" + "secondString" + "\u0009" + "firstKey" + "\u0004" + "1" + "\u0001" + "secondKey" + "\u0004" + "2";
 
@@ -201,4 +428,356 @@
     }
     
   }  
+
+
+  /**
+   * Tests a single null list within a struct with return nulls on
+   */
+
+  public void testNulls1() throws Throwable {
+    try {
+
+
+      // Try to construct an object
+      ArrayList<String> bye = null;
+      HashMap<String, Integer> another = new HashMap<String, Integer>();  
+      another.put("firstKey", 1);
+      another.put("secondKey", 2);
+      ArrayList<Object> struct = new ArrayList<Object>();
+      struct.add(Integer.valueOf(234));
+      struct.add(bye);
+      struct.add(another);
+
+      Properties schema = new Properties();
+      schema.setProperty(Constants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
+      schema.setProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME, "test");
+      schema.setProperty(Constants.SERIALIZATION_DDL,
+                         "struct test { i32 hello, list<string> bye, map<string,i32> another}");
+      schema.setProperty(Constants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
+      schema.setProperty(TCTLSeparatedProtocol.ReturnNullsKey, "true");
+
+      DynamicSerDe serde = new DynamicSerDe();
+      serde.initialize(new Configuration(), schema);
+
+      ObjectInspector oi = serde.getObjectInspector();
+      
+      // Try to serialize
+      BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
+        
+      hexString(bytes);
+
+      // Try to deserialize
+      Object o = serde.deserialize(bytes);
+      assertEquals(struct, o);
+      
+    } catch (Throwable e) {
+      e.printStackTrace();
+      throw e;
+    }
+    
+  }  
+
+  /**
+   * Tests all elements of a struct being null with return nulls on
+   */
+
+  public void testNulls2() throws Throwable {
+    try {
+
+
+      // Try to construct an object
+      ArrayList<String> bye = null;
+      HashMap<String, Integer> another = null;
+      ArrayList<Object> struct = new ArrayList<Object>();
+      struct.add(null);
+      struct.add(bye);
+      struct.add(another);
+
+      Properties schema = new Properties();
+      schema.setProperty(Constants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
+      schema.setProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME, "test");
+      schema.setProperty(Constants.SERIALIZATION_DDL,
+                         "struct test { i32 hello, list<string> bye, map<string,i32> another}");
+      schema.setProperty(Constants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
+      schema.setProperty(TCTLSeparatedProtocol.ReturnNullsKey, "true");
+
+      DynamicSerDe serde = new DynamicSerDe();
+      serde.initialize(new Configuration(), schema);
+
+      ObjectInspector oi = serde.getObjectInspector();
+
+      // Try to serialize
+      BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
+        
+      hexString(bytes);
+
+      // Try to deserialize
+      Object o = serde.deserialize(bytes);
+      List<?> olist = (List<?>)o;
+
+      assertTrue(olist.size() == 3);
+      assertEquals(null, olist.get(0));
+      assertEquals(null, olist.get(1));
+      assertEquals(null, olist.get(2));
+      
+      //      assertEquals(o, struct); Cannot do this because types of null lists are wrong.
+      
+    } catch (Throwable e) {
+      e.printStackTrace();
+      throw e;
+    }
+    
+  }  
+
+  /**
+   * Tests map and list being empty with return nulls on
+   */
+
+  public void testNulls3() throws Throwable {
+    try {
+
+
+      // Try to construct an object
+      ArrayList<String> bye = new ArrayList<String> ();
+      HashMap<String, Integer> another = null;
+      ArrayList<Object> struct = new ArrayList<Object>();
+      struct.add(null);
+      struct.add(bye);
+      struct.add(another);
+
+      Properties schema = new Properties();
+      schema.setProperty(Constants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
+      schema.setProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME, "test");
+      schema.setProperty(Constants.SERIALIZATION_DDL,
+                         "struct test { i32 hello, list<string> bye, map<string,i32> another}");
+      schema.setProperty(Constants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
+
+      schema.setProperty(TCTLSeparatedProtocol.ReturnNullsKey, "true");
+      DynamicSerDe serde = new DynamicSerDe();
+      serde.initialize(new Configuration(), schema);
+
+      ObjectInspector oi = serde.getObjectInspector();
+
+      // Try to serialize
+      BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
+        
+      hexString(bytes);
+
+      // Try to deserialize
+      Object o = serde.deserialize(bytes);
+      List<?> olist = (List<?>)o;
+
+      assertTrue(olist.size() == 3);
+      assertEquals(null, olist.get(0));
+      assertEquals(0, ((List<?>)olist.get(1)).size());
+      assertEquals(null, olist.get(2));
+      
+      //      assertEquals(o, struct); Cannot do this because types of null lists are wrong.
+      
+    } catch (Throwable e) {
+      e.printStackTrace();
+      throw e;
+    }
+    
+  }  
+
+
+  /**
+   * Tests map and list null/empty with return nulls *off*
+   */
+
+  public void testNulls4() throws Throwable {
+    try {
+
+
+      // Try to construct an object
+      ArrayList<String> bye = new ArrayList<String> ();
+      HashMap<String, Integer> another = null;
+      ArrayList<Object> struct = new ArrayList<Object>();
+      struct.add(null);
+      struct.add(bye);
+      struct.add(another);
+
+      Properties schema = new Properties();
+      schema.setProperty(Constants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
+      schema.setProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME, "test");
+      schema.setProperty(Constants.SERIALIZATION_DDL,
+                         "struct test { i32 hello, list<string> bye, map<string,i32> another}");
+      schema.setProperty(Constants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
+
+      schema.setProperty(TCTLSeparatedProtocol.ReturnNullsKey, "false");
+      DynamicSerDe serde = new DynamicSerDe();
+      serde.initialize(new Configuration(), schema);
+
+      ObjectInspector oi = serde.getObjectInspector();
+
+      // Try to serialize
+      BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
+        
+      hexString(bytes);
+
+      // Try to deserialize
+      Object o = serde.deserialize(bytes);
+      List<?> olist = (List<?>)o;
+
+      assertTrue(olist.size() == 3);
+      assertEquals(new Integer(0), (Integer)olist.get(0));
+      List<?> num1 = (List<?>)olist.get(1);
+      assertTrue(num1.size() == 0);
+      Map<?,?> num2 = (Map<?,?>)olist.get(2);
+      assertTrue(num2.size() == 0);
+      
+      //      assertEquals(o, struct); Cannot do this because types of null lists are wrong.
+      
+    } catch (Throwable e) {
+      e.printStackTrace();
+      throw e;
+    }
+    
+  }  
+
+
+  /**
+   * Tests map and list null/empty with return nulls *off*
+   */
+
+  public void testStructsinStructs() throws Throwable {
+    try {
+
+
+      Properties schema = new Properties();
+      //      schema.setProperty(Constants.SERIALIZATION_FORMAT, com.facebook.thrift.protocol.TJSONProtocol.class.getName());
+      schema.setProperty(Constants.SERIALIZATION_FORMAT, com.facebook.thrift.protocol.TBinaryProtocol.class.getName());
+      schema.setProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME, "test");
+      schema.setProperty(Constants.SERIALIZATION_DDL,
+                         "struct inner { i32 field1, string field2 },struct  test {inner foo,  i32 hello, list<string> bye, map<string,i32> another}");
+      schema.setProperty(Constants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
+
+      
+      //
+      // construct object of above type
+      //
+
+      // construct the inner struct
+      ArrayList<Object> innerStruct = new ArrayList<Object>();
+      innerStruct.add(new Integer(22));
+      innerStruct.add(new String("hello world"));
+
+      // construct outer struct
+      ArrayList<String> bye = new ArrayList<String> ();
+      bye.add("firstString");
+      bye.add("secondString");
+      HashMap<String, Integer> another = new HashMap<String, Integer>();  
+      another.put("firstKey", 1);
+      another.put("secondKey", 2);
+
+      ArrayList<Object> struct = new ArrayList<Object>();
+
+      struct.add(innerStruct);
+      struct.add(Integer.valueOf(234));
+      struct.add(bye);
+      struct.add(another);
+
+      DynamicSerDe serde = new DynamicSerDe();
+      serde.initialize(new Configuration(), schema);
+
+      ObjectInspector oi = serde.getObjectInspector();
+
+      // Try to serialize
+      BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
+
+      // Try to deserialize
+      Object o = serde.deserialize(bytes);
+      List<?> olist = (List<?>)o;
+
+
+      assertEquals(4, olist.size());
+      assertEquals(innerStruct, olist.get(0));
+      assertEquals(new Integer(234), olist.get(1));
+      assertEquals(bye, olist.get(2));
+      assertEquals(another, olist.get(3));
+
+    } catch (Throwable e) {
+      e.printStackTrace();
+      throw e;
+    }
+    
+  }  
+
+
+
+
+  public void testSkip() throws Throwable {
+    try {
+
+      // Try to construct an object
+      ArrayList<String> bye = new ArrayList<String>();
+      bye.add("firstString");
+      bye.add("secondString");
+      HashMap<String, Integer> another = new HashMap<String, Integer>();  
+      another.put("firstKey", 1);
+      another.put("secondKey", 2);
+      ArrayList<Object> struct = new ArrayList<Object>();
+      struct.add(Integer.valueOf(234));
+      struct.add(bye);
+      struct.add(another);
+
+      Properties schema = new Properties();
+      schema.setProperty(Constants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
+      schema.setProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME, "test");
+      schema.setProperty(Constants.SERIALIZATION_DDL,
+                         "struct test { i32 hello, list<string> bye, map<string,i32> another}");
+      schema.setProperty(Constants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
+
+      schema.setProperty(Constants.FIELD_DELIM, "9");
+      schema.setProperty(Constants.COLLECTION_DELIM, "1");
+      schema.setProperty(Constants.LINE_DELIM, "2");
+      schema.setProperty(Constants.MAPKEY_DELIM, "4");
+
+      DynamicSerDe serde = new DynamicSerDe();
+      serde.initialize(new Configuration(), schema);
+
+      TCTLSeparatedProtocol prot = (TCTLSeparatedProtocol)serde.oprot_;
+      assertTrue(prot.getPrimarySeparator().equals("\u0009"));
+      
+      ObjectInspector oi = serde.getObjectInspector();
+
+      // Try to serialize
+      BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
+        
+      hexString(bytes);
+
+      String compare = "234" + "\u0009" + "firstString" + "\u0001" + "secondString" + "\u0009" + "firstKey" + "\u0004" + "1" + "\u0001" + "secondKey" + "\u0004" + "2";
+
+      System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()) + ">");
+      System.out.println("compare to    =" + compare + ">");
+        
+      assertTrue(compare.equals( new String(bytes.get(), 0, bytes.getSize())));
+      
+      schema.setProperty(Constants.SERIALIZATION_DDL,
+                         "struct test { i32 hello, skip list<string> bye, map<string,i32> another}");
+
+      serde.initialize(new Configuration(), schema);
+
+      // Try to deserialize
+      Object o = serde.deserialize(bytes);
+      System.out.println("o class = " + o.getClass());
+      List<?> olist = (List<?>)o;
+      System.out.println("o size = " + olist.size());
+      System.out.println("o = " + o);
+        
+      assertEquals(null, olist.get(1));
+
+      // set the skipped field to null
+      struct.set(1,null);
+
+      assertEquals(o, struct);
+      
+    } catch (Throwable e) {
+      e.printStackTrace();
+      throw e;
+    }
+    
+  }  
+
 }

Modified: hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestStandardObjectInspectors.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestStandardObjectInspectors.java?rev=712905&r1=712904&r2=712905&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestStandardObjectInspectors.java (original)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestStandardObjectInspectors.java Mon Nov 10 17:50:06 2008
@@ -34,7 +34,7 @@
       assertEquals(oi1, oi2);
       assertEquals(Category.PRIMITIVE, oi1.getCategory());
       assertEquals(c, oi1.getPrimitiveClass());
-      assertEquals(ObjectInspectorUtils.getClassShortName(c.getName()),
+      assertEquals(ObjectInspectorUtils.getClassShortName(c),
           oi1.getTypeName()); 
     } catch (Throwable e) {
       e.printStackTrace();