You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by dh...@apache.org on 2008/11/11 02:50:18 UTC
svn commit: r712905 [38/38] - in /hadoop/core/trunk: ./ src/contrib/hive/
src/contrib/hive/cli/src/java/org/apache/hadoop/hive/cli/
src/contrib/hive/common/src/java/org/apache/hadoop/hive/conf/
src/contrib/hive/conf/ src/contrib/hive/data/files/ src/co...
Modified: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java?rev=712905&r1=712904&r2=712905&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java (original)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java Mon Nov 10 17:50:06 2008
@@ -28,6 +28,7 @@
import com.facebook.thrift.protocol.*;
import java.util.*;
import java.util.regex.Pattern;
+import java.util.regex.Matcher;
import java.io.*;
import org.apache.hadoop.conf.Configuration;
import java.util.Properties;
@@ -39,7 +40,8 @@
* This is not thrift compliant in that it doesn't write out field ids
* so things cannot actually be versioned.
*/
-public class TCTLSeparatedProtocol extends TProtocol implements ConfigurableTProtocol {
+public class TCTLSeparatedProtocol extends TProtocol
+ implements ConfigurableTProtocol, WriteNullsProtocol, SkippableTProtocol {
final static Log LOG = LogFactory.getLog(TCTLSeparatedProtocol.class.getName());
@@ -57,29 +59,36 @@
/**
* These are defaults, but for now leaving them like this
*/
- final static protected byte defaultPrimarySeparatorByte = 1;
- final static protected byte defaultSecondarySeparatorByte = 2;
- final static protected byte defaultRowSeparatorByte = (byte)'\n';
- final static protected byte defaultMapSeparatorByte = 3;
+ final static protected String defaultPrimarySeparator = "\001";
+ final static protected String defaultSecondarySeparator = "\002";
+ final static protected String defaultRowSeparator = "\n";
+ final static protected String defaultMapSeparator = "\003";
/**
* The separators for this instance
*/
- protected byte primarySeparatorByte;
- protected byte secondarySeparatorByte;
- protected byte rowSeparatorByte;
- protected byte mapSeparatorByte;
+ protected String primarySeparator;
+ protected String secondarySeparator;
+ protected String rowSeparator;
+ protected String mapSeparator;
protected Pattern primaryPattern;
protected Pattern secondaryPattern;
protected Pattern mapPattern;
/**
+ * The quote character when supporting quotes with ability to not split across quoted entries. Like csv.
+ * Note that escaping the quote is not currently supported.
+ */
+ protected String quote;
+
+
+ /**
* Inspect the separators this instance is configured with.
*/
- public byte getPrimarySeparator() { return primarySeparatorByte; }
- public byte getSecondarySeparator() { return secondarySeparatorByte; }
- public byte getRowSeparator() { return rowSeparatorByte; }
- public byte getMapSeparator() { return mapSeparatorByte; }
+ public String getPrimarySeparator() { return primarySeparator; }
+ public String getSecondarySeparator() { return secondarySeparator; }
+ public String getRowSeparator() { return rowSeparator; }
+ public String getMapSeparator() { return mapSeparator; }
/**
@@ -164,6 +173,17 @@
protected int bufferSize;
/**
+ * The string representing nulls in the serialized data. e.g., \N as in mysql
+ */
+ protected String nullString;
+
+ /**
+ * The nullString in bytes
+ */
+ protected byte nullBuf[];
+
+
+ /**
* A convenience class for tokenizing a TTransport
*/
@@ -174,12 +194,12 @@
final String separator;
byte buf[];
- public SimpleTransportTokenizer(TTransport trans, byte separator, int buffer_length) {
+ public SimpleTransportTokenizer(TTransport trans, String separator, int buffer_length) {
this.trans = trans;
- byte [] separators = new byte[1];
- separators[0] = separator;
- this.separator = new String(separators);
+ this.separator = separator;
buf = new byte[buffer_length];
+ // do not fill tokenizer until user requests since filling it could read in data
+ // not meant for this instantiation.
fillTokenizer();
}
@@ -191,7 +211,7 @@
return false;
}
String row = new String(buf, 0, length);
- tokenizer = new StringTokenizer(row, new String(separator), true);
+ tokenizer = new StringTokenizer(row, separator, true);
} catch(TTransportException e) {
e.printStackTrace();
tokenizer = null;
@@ -204,6 +224,10 @@
StringBuffer ret = null;
boolean done = false;
+ if(tokenizer == null) {
+ fillTokenizer();
+ }
+
while(! done) {
if(! tokenizer.hasMoreTokens()) {
@@ -211,7 +235,6 @@
break;
}
}
-
try {
final String nextToken = tokenizer.nextToken();
@@ -229,7 +252,8 @@
done = true;
}
} // while ! done
- return ret == null ? null : ret.toString();
+ final String theRet = ret == null ? null : ret.toString();
+ return theRet;
}
};
@@ -242,23 +266,23 @@
*/
public TCTLSeparatedProtocol(TTransport trans) {
- this(trans, defaultPrimarySeparatorByte, defaultSecondarySeparatorByte, defaultMapSeparatorByte, defaultRowSeparatorByte, false, 4096);
+ this(trans, defaultPrimarySeparator, defaultSecondarySeparator, defaultMapSeparator, defaultRowSeparator, false, 4096);
}
public TCTLSeparatedProtocol(TTransport trans, int buffer_size) {
- this(trans, defaultPrimarySeparatorByte, defaultSecondarySeparatorByte, defaultMapSeparatorByte, defaultRowSeparatorByte, false, buffer_size);
+ this(trans, defaultPrimarySeparator, defaultSecondarySeparator, defaultMapSeparator, defaultRowSeparator, false, buffer_size);
}
/**
* @param trans - the ttransport to use as input or output
- * @param primarySeparatorByte the separator between columns (aka fields)
- * @param secondarySeparatorByte the separator within a field for things like sets and maps and lists
- * @param mapSeparatorByte - the key/value separator
- * @param rowSeparatorByte - the record separator
+ * @param primarySeparator the separator between columns (aka fields)
+ * @param secondarySeparator the separator within a field for things like sets and maps and lists
+ * @param mapSeparator - the key/value separator
+ * @param rowSeparator - the record separator
* @param returnNulls - whether to return a null or an empty string for fields that seem empty (ie two primary separators back to back)
*/
- public TCTLSeparatedProtocol(TTransport trans, byte primarySeparatorByte, byte secondarySeparatorByte, byte mapSeparatorByte, byte rowSeparatorByte,
+ public TCTLSeparatedProtocol(TTransport trans, String primarySeparator, String secondarySeparator, String mapSeparator, String rowSeparator,
boolean returnNulls,
int bufferSize) {
super(trans);
@@ -266,34 +290,103 @@
returnNulls = returnNulls;
- this.primarySeparatorByte = primarySeparatorByte;
- this.secondarySeparatorByte = secondarySeparatorByte;
- this.rowSeparatorByte = rowSeparatorByte;
- this.mapSeparatorByte = mapSeparatorByte;
+ this.primarySeparator = primarySeparator;
+ this.secondarySeparator = secondarySeparator;
+ this.rowSeparator = rowSeparator;
+ this.mapSeparator = mapSeparator;
this.innerTransport = trans;
this.bufferSize = bufferSize;
+ this.nullString = "\\N";
internalInitialize();
}
+
/**
* Sets the internal separator patterns and creates the internal tokenizer.
*/
protected void internalInitialize() {
- byte []primarySeparator = new byte[1];
- byte []secondarySeparator = new byte[1];
- primarySeparator[0] = primarySeparatorByte;
- secondarySeparator[0] = secondarySeparatorByte;
-
- primaryPattern = Pattern.compile(new String(primarySeparator));
- secondaryPattern = Pattern.compile(new String(secondarySeparator));
- mapPattern = Pattern.compile("\\0" + secondarySeparatorByte + "|\\0" + mapSeparatorByte);
- transportTokenizer = new SimpleTransportTokenizer(innerTransport, rowSeparatorByte, bufferSize);
+ // in the future could allow users to specify a quote character that doesn't need escaping but for now ...
+ final String primaryPatternString =
+ quote == null ? primarySeparator :
+ "(?:^|" + primarySeparator + ")(" + quote + "(?:[^" + quote + "]+|" + quote + quote + ")*" + quote + "|[^" + primarySeparator + "]*)";
+
+ if (quote != null) {
+ stripSeparatorPrefix = Pattern.compile("^" + primarySeparator);
+ stripQuotePrefix = Pattern.compile("^" + quote);
+ stripQuotePostfix = Pattern.compile(quote + "$");
+ }
+
+ primaryPattern = Pattern.compile(primaryPatternString);
+ secondaryPattern = Pattern.compile(secondarySeparator);
+ mapPattern = Pattern.compile(secondarySeparator + "|" + mapSeparator);
+ nullBuf = nullString.getBytes();
+ transportTokenizer = new SimpleTransportTokenizer(innerTransport, rowSeparator, bufferSize);
+ }
+
+ /**
+ * For quoted fields, strip away the quotes and also need something to strip away the control separator when using
+ * complex split method defined here.
+ */
+ protected Pattern stripSeparatorPrefix;
+ protected Pattern stripQuotePrefix;
+ protected Pattern stripQuotePostfix;
+
+
+ /**
+ *
+ * Split the line based on a complex regex pattern
+ *
+ * @param line the current row
+ * @param p the pattern for matching fields in the row
+ * @return List of Strings - not including the separator in them
+ */
+ protected String[] complexSplit(String line, Pattern p) {
+
+ ArrayList<String> list = new ArrayList<String>();
+ Matcher m = p.matcher(line);
+ // For each field
+ while (m.find()) {
+ String match = m.group();
+ if (match == null)
+ break;
+ if (match.length() == 0)
+ match = null;
+ else {
+ if(stripSeparatorPrefix.matcher(match).find()) {
+ match = match.substring(1);
+ }
+ if(stripQuotePrefix.matcher(match).find()) {
+ match = match.substring(1);
+ }
+ if(stripQuotePostfix.matcher(match).find()) {
+ match = match.substring(0,match.length() - 1);
+ }
+ }
+ list.add(match);
+ }
+ return (String[])list.toArray(new String[1]);
}
+
+
+ protected String getByteValue(String altValue, String defaultVal) {
+ if (altValue != null && altValue.length() > 0) {
+ try {
+ byte b [] = new byte[1];
+ b[0] = Byte.valueOf(altValue).byteValue();
+ return new String(b);
+ } catch(NumberFormatException e) {
+ return altValue;
+ }
+ }
+ return defaultVal;
+ }
+
+
/**
* Initialize the TProtocol
* @param conf System properties
@@ -301,13 +394,16 @@
* @throws TException
*/
public void initialize(Configuration conf, Properties tbl) throws TException {
- primarySeparatorByte = Byte.valueOf(tbl.getProperty(Constants.FIELD_DELIM, String.valueOf(primarySeparatorByte))).byteValue();
- LOG.debug("collections delim=<" + tbl.getProperty(Constants.COLLECTION_DELIM) + ">" );
- secondarySeparatorByte = Byte.valueOf(tbl.getProperty(Constants.COLLECTION_DELIM, String.valueOf(secondarySeparatorByte))).byteValue();
- rowSeparatorByte = Byte.valueOf(tbl.getProperty(Constants.LINE_DELIM, String.valueOf(rowSeparatorByte))).byteValue();
- mapSeparatorByte = Byte.valueOf(tbl.getProperty(Constants.MAPKEY_DELIM, String.valueOf(mapSeparatorByte))).byteValue();
+
+
+ primarySeparator = getByteValue(tbl.getProperty(Constants.FIELD_DELIM), primarySeparator);
+ secondarySeparator = getByteValue(tbl.getProperty(Constants.COLLECTION_DELIM), secondarySeparator);
+ rowSeparator = getByteValue(tbl.getProperty(Constants.LINE_DELIM), rowSeparator);
+ mapSeparator = getByteValue(tbl.getProperty(Constants.MAPKEY_DELIM), mapSeparator);
returnNulls = Boolean.valueOf(tbl.getProperty(ReturnNullsKey, String.valueOf(returnNulls))).booleanValue();
bufferSize = Integer.valueOf(tbl.getProperty(BufferSizeKey, String.valueOf(bufferSize))).intValue();
+ nullString = tbl.getProperty(Constants.SERIALIZATION_NULL_FORMAT, "\\N");
+ quote = tbl.getProperty(Constants.QUOTE_CHAR, null);
internalInitialize();
@@ -329,7 +425,7 @@
public void writeFieldBegin(TField field) throws TException {
if(! firstField) {
- writeByte(primarySeparatorByte);
+ internalWriteString(primarySeparator);
}
firstField = false;
}
@@ -424,21 +520,34 @@
writeString(String.valueOf(dub));
}
+ public void internalWriteString(String str) throws TException {
+ if(str != null) {
+ final byte buf[] = str.getBytes();
+ trans_.write(buf, 0, buf.length);
+ } else {
+ trans_.write(nullBuf, 0, nullBuf.length);
+ }
+ }
+
public void writeString(String str) throws TException {
if(inner) {
if(!firstInnerField) {
// super hack city notice the mod plus only happens after firstfield hit, so == 0 is right.
if(isMap && elemIndex++ % 2 == 0) {
- writeByte(mapSeparatorByte);
+ internalWriteString(mapSeparator);
} else {
- writeByte(secondarySeparatorByte);
+ internalWriteString(secondarySeparator);
}
} else {
firstInnerField = false;
}
}
- final byte buf[] = str.getBytes();
- trans_.write(buf, 0, buf.length);
+ if(str != null) {
+ final byte buf[] = str.getBytes();
+ trans_.write(buf, 0, buf.length);
+ } else {
+ trans_.write(nullBuf, 0, nullBuf.length);
+ }
}
public void writeBinary(byte[] bin) throws TException {
@@ -456,7 +565,7 @@
assert(!inner);
try {
final String tmp = transportTokenizer.nextToken();
- columns = primaryPattern.split(tmp);
+ columns = quote == null ? primaryPattern.split(tmp) : complexSplit(tmp, primaryPattern);
index = 0;
return new TStruct();
} catch(EOFException e) {
@@ -468,6 +577,20 @@
columns = null;
}
+
+ /**
+ * Skip past the current field
+ * Just increments the field index counter.
+ */
+ public void skip(byte type) {
+ if( inner) {
+ innerIndex++;
+ } else {
+ index++;
+ }
+ }
+
+
public TField readFieldBegin() throws TException {
assert( !inner);
TField f = new TField();
@@ -483,11 +606,19 @@
public TMap readMapBegin() throws TException {
assert( !inner);
TMap map = new TMap();
- fields = mapPattern.split(columns[index++]);
- if(fields != null) {
- map.size = fields.length/2;
- } else {
+ if(columns[index] == null ||
+ columns[index].equals(nullString)) {
+ index++;
+ if(returnNulls) {
+ return null;
+ }
+ map.size = 0;
+ } else if(columns[index].isEmpty()) {
map.size = 0;
+ index++;
+ } else {
+ fields = mapPattern.split(columns[index++]);
+ map.size = fields.length/2;
}
innerIndex = 0;
inner = true;
@@ -503,11 +634,19 @@
public TList readListBegin() throws TException {
assert( !inner);
TList list = new TList();
- fields = secondaryPattern.split(columns[index++]);
- if(fields != null) {
- list.size = fields.length ;
- } else {
+ if(columns[index] == null ||
+ columns[index].equals(nullString)) {
+ index++;
+ if(returnNulls) {
+ return null;
+ }
+ list.size = 0;
+ } else if(columns[index].isEmpty()) {
list.size = 0;
+ index++;
+ } else {
+ fields = secondaryPattern.split(columns[index++]);
+ list.size = fields.length ;
}
innerIndex = 0;
inner = true;
@@ -521,53 +660,88 @@
public TSet readSetBegin() throws TException {
assert( !inner);
TSet set = new TSet();
- fields = secondaryPattern.split(columns[index++]);
- if(fields != null) {
- set.size = fields.length ;
- } else {
+ if(columns[index] == null ||
+ columns[index].equals(nullString)) {
+ index++;
+ if(returnNulls) {
+ return null;
+ }
set.size = 0;
+ } else if(columns[index].isEmpty()) {
+ set.size = 0;
+ index++;
+ } else {
+ fields = secondaryPattern.split(columns[index++]);
+ set.size = fields.length ;
}
inner = true;
innerIndex = 0;
return set;
}
+ protected boolean lastPrimitiveWasNullFlag;
+
+ public boolean lastPrimitiveWasNull() throws TException {
+ return lastPrimitiveWasNullFlag;
+ }
+
+ public void writeNull() throws TException {
+ writeString(null);
+ }
+
public void readSetEnd() throws TException {
inner = false;
}
+
public boolean readBool() throws TException {
- return Boolean.valueOf(readString()).booleanValue();
+ String val = readString();
+ lastPrimitiveWasNullFlag = val == null;
+ return val == null || val.isEmpty() ? false : Boolean.valueOf(val).booleanValue();
}
public byte readByte() throws TException {
- return Byte.valueOf(readString()).byteValue();
+ String val = readString();
+ lastPrimitiveWasNullFlag = val == null;
+ return val == null || val.isEmpty() ? 0 : Byte.valueOf(val).byteValue();
}
public short readI16() throws TException {
- return Short.valueOf(readString()).shortValue();
+ String val = readString();
+ lastPrimitiveWasNullFlag = val == null;
+ return val == null || val.isEmpty() ? 0 : Short.valueOf(val).shortValue();
}
public int readI32() throws TException {
- return Integer.valueOf(readString()).intValue();
+ String val = readString();
+ lastPrimitiveWasNullFlag = val == null;
+ return val == null || val.isEmpty() ? 0 : Integer.valueOf(val).intValue();
}
public long readI64() throws TException {
- return Long.valueOf(readString()).longValue();
+ String val = readString();
+ lastPrimitiveWasNullFlag = val == null;
+ return val == null || val.isEmpty() ? 0 : Long.valueOf(val).longValue();
}
public double readDouble() throws TException {
- return Double.valueOf(readString()).doubleValue();
+ String val = readString();
+ lastPrimitiveWasNullFlag = val == null;
+ return val == null || val.isEmpty() ? 0 :Double.valueOf(val).doubleValue();
}
- protected String [] curMapPair;
public String readString() throws TException {
String ret;
if(!inner) {
- ret = columns != null && index < columns.length ? columns[index++] : null;
+ ret = columns != null && index < columns.length ? columns[index] : null;
+ index++;
} else {
- ret = fields != null && innerIndex < fields.length ? fields[innerIndex++] : null;
+ ret = fields != null && innerIndex < fields.length ? fields[innerIndex] : null;
+ innerIndex++;
}
- return ret == null && ! returnNulls ? "" : ret;
+ if(ret == null || ret.equals(nullString))
+ return returnNulls ? null : "";
+ else
+ return ret;
}
public byte[] readBinary() throws TException {
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/WriteNullsProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/WriteNullsProtocol.java?rev=712905&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/WriteNullsProtocol.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/WriteNullsProtocol.java Mon Nov 10 17:50:06 2008
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.thrift;
+
+import com.facebook.thrift.TException;
+
+/**
+ * An interface for TProtocols that actually write out nulls -
+ * This should be for all those that don't actually use
+ * fieldids in the written data like TCTLSeparatedProtocol.
+ *
+ */
+public interface WriteNullsProtocol {
+ /**
+ * Was the last primitive read really a NULL. Need
+ * only be called when the value of the primitive
+ * was 0. ie the protocol should return 0 on nulls
+ * and the caller will then check if it was actually null
+ * For boolean this is false.
+ */
+ public boolean lastPrimitiveWasNull() throws TException;
+
+ /**
+ * Write a null
+ */
+ public void writeNull() throws TException;
+
+}
Modified: hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/TestTCTLSeparatedProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/TestTCTLSeparatedProtocol.java?rev=712905&r1=712904&r2=712905&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/TestTCTLSeparatedProtocol.java (original)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/TestTCTLSeparatedProtocol.java Mon Nov 10 17:50:06 2008
@@ -18,6 +18,9 @@
package org.apache.hadoop.hive.serde2;
+
+import org.apache.hadoop.hive.serde.Constants;
+
import junit.framework.TestCase;
import java.io.*;
import org.apache.hadoop.hive.serde2.*;
@@ -27,6 +30,7 @@
import com.facebook.thrift.transport.*;
import com.facebook.thrift.*;
import com.facebook.thrift.protocol.*;
+import org.apache.hadoop.conf.Configuration;
public class TestTCTLSeparatedProtocol extends TestCase {
@@ -71,7 +75,7 @@
// use 3 as the row buffer size to force lots of re-buffering.
- TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 3);
+ TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 1024);
prot.readStructBegin();
@@ -116,7 +120,7 @@
public void testWrites() throws Exception {
try {
TMemoryBuffer trans = new TMemoryBuffer(1024);
- TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 3);
+ TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 1024);
prot.writeStructBegin(new TStruct());
prot.writeFieldBegin(new TField());
@@ -164,8 +168,6 @@
String test = new String(b, 0, len);
String testRef = "100348.55234.22hello world!key1val1key2val2key3val3elem1elem2bye!";
- // System.err.println("test=" + test + ">");
- // System.err.println(" ref=" + testRef + ">");
assertTrue(test.equals(testRef));
trans = new TMemoryBuffer(1023);
@@ -242,4 +244,263 @@
}
}
+ public void testQuotedWrites() throws Exception {
+ try {
+ TMemoryBuffer trans = new TMemoryBuffer(4096);
+ TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 4096);
+ Properties schema = new Properties();
+ schema.setProperty(Constants.QUOTE_CHAR, "\"");
+ schema.setProperty(Constants.FIELD_DELIM, ",");
+ prot.initialize(new Configuration(), schema);
+
+ String testStr = "\"hello, world!\"";
+
+ prot.writeStructBegin(new TStruct());
+
+ prot.writeFieldBegin(new TField());
+ prot.writeString(testStr);
+ prot.writeFieldEnd();
+
+ prot.writeFieldBegin(new TField());
+ prot.writeListBegin(new TList());
+ prot.writeString("elem1");
+ prot.writeString("elem2");
+ prot.writeListEnd();
+ prot.writeFieldEnd();
+
+ prot.writeStructEnd();
+ prot.writeString("\n");
+
+ trans.flush();
+
+ byte b[] = new byte[4096];
+ int len = trans.read(b,0,b.length);
+
+
+ trans = new TMemoryBuffer(4096);
+ trans.write(b,0,len);
+ prot = new TCTLSeparatedProtocol(trans, 1024);
+ prot.initialize(new Configuration(), schema);
+
+ prot.readStructBegin();
+ prot.readFieldBegin();
+ final String firstRead = prot.readString();
+ prot.readFieldEnd();
+
+ testStr = testStr.replace("\"","");
+
+ assertEquals(testStr, firstRead);
+
+
+ // the 2 element list
+ prot.readFieldBegin();
+ TList l = prot.readListBegin();
+ assertTrue(l.size == 2);
+ assertTrue(prot.readString().equals("elem1"));
+ assertTrue(prot.readString().equals("elem2"));
+ prot.readListEnd();
+ prot.readFieldEnd();
+
+ // shouldl return nulls at end
+ prot.readFieldBegin();
+ assertTrue(prot.readString().equals(""));
+ prot.readFieldEnd();
+
+ // shouldl return nulls at end
+ prot.readFieldBegin();
+ assertTrue(prot.readString().equals(""));
+ prot.readFieldEnd();
+
+ prot.readStructEnd();
+
+
+ } catch(Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+
+ /**
+ * Tests a sample apache log format. This is actually better done in general with a more TRegexLike protocol, but for this
+ * case, TCTLSeparatedProtocol can do it.
+ */
+ public void test1ApacheLogFormat() throws Exception {
+ try {
+ final String sample = "127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326";
+
+ TMemoryBuffer trans = new TMemoryBuffer(4096);
+ trans.write(sample.getBytes(), 0, sample.getBytes().length);
+ trans.flush();
+
+ TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 4096);
+ Properties schema = new Properties();
+
+ // this is a hacky way of doing the quotes since it will match any 2 of these, so
+ // "[ hello this is something to split [" would be considered to be quoted.
+ schema.setProperty(Constants.QUOTE_CHAR, "(\"|\\[|\\])");
+
+ schema.setProperty(Constants.FIELD_DELIM, " ");
+ schema.setProperty(Constants.SERIALIZATION_NULL_FORMAT, "-");
+ prot.initialize(new Configuration(), schema);
+
+ prot.readStructBegin();
+
+ // ip address
+ prot.readFieldBegin();
+ final String ip = prot.readString();
+ prot.readFieldEnd();
+
+ assertEquals("127.0.0.1", ip);
+
+ // identd
+ prot.readFieldBegin();
+ final String identd = prot.readString();
+ prot.readFieldEnd();
+
+ assertEquals("", identd);
+
+ // user
+ prot.readFieldBegin();
+ final String user = prot.readString();
+ prot.readFieldEnd();
+
+ assertEquals("frank",user);
+
+ // finishTime
+ prot.readFieldBegin();
+ final String finishTime = prot.readString();
+ prot.readFieldEnd();
+
+ assertEquals("10/Oct/2000:13:55:36 -0700",finishTime);
+
+ // requestLine
+ prot.readFieldBegin();
+ final String requestLine = prot.readString();
+ prot.readFieldEnd();
+
+ assertEquals("GET /apache_pb.gif HTTP/1.0",requestLine);
+
+ // returncode
+ prot.readFieldBegin();
+ final int returnCode = prot.readI32();
+ prot.readFieldEnd();
+
+ assertEquals(200, returnCode);
+
+ // return size
+ prot.readFieldBegin();
+ final int returnSize = prot.readI32();
+ prot.readFieldEnd();
+
+ assertEquals(2326, returnSize);
+
+ prot.readStructEnd();
+
+ } catch(Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+
+
+ public void testNulls() throws Exception {
+ try {
+ TMemoryBuffer trans = new TMemoryBuffer(1024);
+ TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 10);
+
+ prot.writeStructBegin(new TStruct());
+
+ prot.writeFieldBegin(new TField());
+ prot.writeString(null);
+ prot.writeFieldEnd();
+
+ prot.writeFieldBegin(new TField());
+ prot.writeString(null);
+ prot.writeFieldEnd();
+
+ prot.writeFieldBegin(new TField());
+ prot.writeI32(100);
+ prot.writeFieldEnd();
+
+ prot.writeFieldBegin(new TField());
+ prot.writeString(null);
+ prot.writeFieldEnd();
+
+ prot.writeFieldBegin(new TField());
+ prot.writeMapBegin(new TMap());
+ prot.writeString(null);
+ prot.writeString(null);
+ prot.writeString("key2");
+ prot.writeString(null);
+ prot.writeString(null);
+ prot.writeString("val3");
+ prot.writeMapEnd();
+ prot.writeFieldEnd();
+
+ prot.writeStructEnd();
+
+ byte b[] = new byte[3*1024];
+ int len = trans.read(b,0,b.length);
+ String written = new String(b,0,len);
+
+ String testRef = "\\N\\N100\\N\\N\\Nkey2\\N\\Nval3";
+
+ assertTrue(testRef.equals(written));
+
+ trans = new TMemoryBuffer(1023);
+ trans.write(b, 0, len);
+
+ prot = new TCTLSeparatedProtocol(trans, 3);
+
+ prot.readStructBegin();
+
+ prot.readFieldBegin();
+ String ret = prot.readString();
+ prot.readFieldEnd();
+
+ assertTrue(ret.equals(""));
+
+ prot.readFieldBegin();
+ ret = prot.readString();
+ prot.readFieldEnd();
+
+ assertTrue(ret.equals(""));
+
+ prot.readFieldBegin();
+ int ret1 = prot.readI32();
+ prot.readFieldEnd();
+
+ assertTrue(ret1 == 100);
+
+
+ prot.readFieldBegin();
+ ret1 = prot.readI32();
+ prot.readFieldEnd();
+
+ prot.readFieldBegin();
+ TMap map = prot.readMapBegin();
+
+ assertTrue(map.size == 3);
+
+ assertTrue(prot.readString().isEmpty());
+ assertTrue(prot.readString().isEmpty());
+
+ assertTrue(prot.readString().equals("key2"));
+ assertTrue(prot.readString().isEmpty());
+
+ assertTrue(prot.readString().isEmpty());
+ assertTrue(prot.readString().equals("val3"));
+
+ prot.readMapEnd();
+ prot.readFieldEnd();
+
+ assertTrue(ret1 == 0);
+
+ } catch(Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+
+
}
Modified: hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/dynamic_type/TestDynamicSerDe.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/dynamic_type/TestDynamicSerDe.java?rev=712905&r1=712904&r2=712905&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/dynamic_type/TestDynamicSerDe.java (original)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/dynamic_type/TestDynamicSerDe.java Mon Nov 10 17:50:06 2008
@@ -20,8 +20,11 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
+import java.util.Map;
import java.util.List;
import java.util.Properties;
+import java.util.Random;
+import java.util.Map.Entry;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde.Constants;
@@ -35,6 +38,13 @@
public class TestDynamicSerDe extends TestCase {
+ public static HashMap<String,String> makeHashMap(String... params) {
+ HashMap<String,String> r = new HashMap<String,String>();
+ for(int i=0; i<params.length; i+=2) {
+ r.put(params[i], params[i+1]);
+ }
+ return r;
+ }
public void testDynamicSerDe() throws Throwable {
try {
@@ -49,24 +59,41 @@
struct.add(Integer.valueOf(234));
struct.add(bye);
struct.add(another);
+ struct.add(Integer.valueOf(-234));
+ struct.add(Double.valueOf(1.0));
+ struct.add(Double.valueOf(-2.5));
+
// All protocols
ArrayList<String> protocols = new ArrayList<String>();
ArrayList<Boolean> isBinaries = new ArrayList<Boolean>();
-
+ ArrayList<HashMap<String,String>> additionalParams = new ArrayList<HashMap<String,String>>();
+
+ protocols.add(org.apache.hadoop.hive.serde2.thrift.TBinarySortableProtocol.class.getName());
+ isBinaries.add(true);
+ additionalParams.add(makeHashMap("serialization.sort.order", "++++++"));
+ protocols.add(org.apache.hadoop.hive.serde2.thrift.TBinarySortableProtocol.class.getName());
+ isBinaries.add(true);
+ additionalParams.add(makeHashMap("serialization.sort.order", "------"));
+
+
protocols.add(com.facebook.thrift.protocol.TBinaryProtocol.class.getName());
isBinaries.add(true);
+ additionalParams.add(null);
protocols.add(com.facebook.thrift.protocol.TJSONProtocol.class.getName());
isBinaries.add(false);
+ additionalParams.add(null);
// TSimpleJSONProtocol does not support deserialization.
// protocols.add(com.facebook.thrift.protocol.TSimpleJSONProtocol.class.getName());
// isBinaries.add(false);
+ // additionalParams.add(null);
// TCTLSeparatedProtocol is not done yet.
protocols.add(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
isBinaries.add(false);
+ additionalParams.add(null);
System.out.println("input struct = " + struct);
@@ -80,8 +107,14 @@
schema.setProperty(Constants.SERIALIZATION_FORMAT, protocol);
schema.setProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME, "test");
schema.setProperty(Constants.SERIALIZATION_DDL,
- "struct test { i32 hello, list<string> bye, map<string,i32> another}");
+ "struct test { i32 hello, list<string> bye, map<string,i32> another, i32 nhello, double d, double nd}");
schema.setProperty(Constants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
+ HashMap<String, String> p = additionalParams.get(pp);
+ if (p != null) {
+ for(Entry<String, String> e: p.entrySet()) {
+ schema.setProperty(e.getKey(), e.getValue());
+ }
+ }
DynamicSerDe serde = new DynamicSerDe();
serde.initialize(new Configuration(), schema);
@@ -93,15 +126,8 @@
// Try to serialize
BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
-
- StringBuilder sb = new StringBuilder();
- for (int i=0; i<bytes.getSize(); i++) {
- byte b = bytes.get()[i];
- int v = (b<0 ? 256 + b : b);
- sb.append(String.format("x%02x", v));
- }
- System.out.println("bytes =" + sb);
-
+ System.out.println("bytes =" + hexString(bytes));
+
if (!isBinary) {
System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()));
}
@@ -116,7 +142,7 @@
System.out.println("o[2] class = " + olist.get(2).getClass());
System.out.println("o = " + o);
- assertEquals(o, struct);
+ assertEquals(struct, o);
}
} catch (Throwable e) {
@@ -124,9 +150,216 @@
throw e;
}
+ }
+ public String hexString(BytesWritable bytes) {
+ StringBuilder sb = new StringBuilder();
+ for (int i=0; i<bytes.getSize(); i++) {
+ byte b = bytes.get()[i];
+ int v = (b<0 ? 256 + b : b);
+ sb.append(String.format("x%02x", v));
+ }
+ return sb.toString();
+ }
+
+
+ private void testTBinarySortableProtocol(Object[] structs, String ddl, boolean ascending) throws Throwable{
+ int fields = ((List)structs[structs.length-1]).size();
+ String order = "";
+ for(int i=0; i<fields; i++) {
+ order = order + (ascending ? "+" : "-");
+ }
+
+ Properties schema = new Properties();
+ schema.setProperty(Constants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TBinarySortableProtocol.class.getName());
+ schema.setProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME, "test");
+ schema.setProperty(Constants.SERIALIZATION_DDL, ddl);
+ schema.setProperty(Constants.SERIALIZATION_LIB, DynamicSerDe.class.getName());
+ schema.setProperty(Constants.SERIALIZATION_SORT_ORDER, order);
+
+ DynamicSerDe serde = new DynamicSerDe();
+ serde.initialize(new Configuration(), schema);
+
+ ObjectInspector oi = serde.getObjectInspector();
+
+ // Try to serialize
+ BytesWritable bytes[] = new BytesWritable[structs.length];
+ for (int i=0; i<structs.length; i++) {
+ bytes[i] = new BytesWritable();
+ BytesWritable s = (BytesWritable)serde.serialize(structs[i], oi);
+ bytes[i].set(s);
+ if (i>0) {
+ int compareResult = bytes[i-1].compareTo(bytes[i]);
+ if ( (compareResult<0 && !ascending) || (compareResult>0 && ascending) ) {
+ System.out.println("Test failed in " + (ascending ? "ascending" : "descending") + " order.");
+ System.out.println("serialized data of " + structs[i-1] + " = " + hexString(bytes[i-1]));
+ System.out.println("serialized data of " + structs[i] + " = " + hexString(bytes[i]));
+ fail("Sort order of serialized " + structs[i-1] + " and " + structs[i] + " are reversed!");
+ }
+ }
+ }
+
+ // Try to deserialize
+ Object[] deserialized = new Object[structs.length];
+ for (int i=0; i<structs.length; i++) {
+ deserialized[i] = serde.deserialize(bytes[i]);
+ if (!structs[i].equals(deserialized[i])) {
+ System.out.println("structs[i] = " + structs[i]);
+ System.out.println("deserialized[i] = " + deserialized[i]);
+ System.out.println("serialized[i] = " + hexString(bytes[i]));
+ assertEquals(structs[i], deserialized[i]);
+ }
+ }
+ }
+
+ static int compare(Object a, Object b) {
+ if (a == null && b == null) return 0;
+ if (a == null) return -1;
+ if (b == null) return 1;
+ if (a instanceof List) {
+ List la = (List)a;
+ List lb = (List)b;
+ assert(la.size() == lb.size());
+ for (int i=0; i<la.size(); i++) {
+ int r = compare(la.get(i), lb.get(i));
+ if (r != 0) return r;
+ }
+ return 0;
+ } else if (a instanceof Number) {
+ Number na = (Number) a;
+ Number nb = (Number) b;
+ if (na.doubleValue() < nb.doubleValue()) return -1;
+ if (na.doubleValue() > nb.doubleValue()) return 1;
+ return 0;
+ } else if (a instanceof String) {
+ String sa = (String) a;
+ String sb = (String) b;
+ return sa.compareTo(sb);
+ }
+ return 0;
+ }
+
+ private void sort(Object[] structs) {
+ for (int i=0; i<structs.length; i++) for (int j=i+1; j<structs.length; j++)
+ if (compare(structs[i], structs[j])>0) {
+ Object t = structs[i];
+ structs[i] = structs[j];
+ structs[j] = t;
+ }
}
+ public void testTBinarySortableProtocol() throws Throwable {
+ try {
+ System.out.println("Beginning Test testTBinarySortableProtocol:");
+
+ int num = 100;
+ Random r = new Random(1234);
+ Object structs[] = new Object[num];
+ String ddl;
+
+ // Test double
+ for (int i=0; i<num; i++) {
+ ArrayList<Object> struct = new ArrayList<Object>();
+ if (i==0) {
+ struct.add(null);
+ } else {
+ struct.add(Double.valueOf((r.nextDouble()-0.5)*10));
+ }
+ structs[i] = struct;
+ }
+ sort(structs);
+ ddl = "struct test { double hello}";
+ System.out.println("Testing " + ddl);
+ testTBinarySortableProtocol(structs, ddl, true);
+ testTBinarySortableProtocol(structs, ddl, false);
+
+ // Test integer
+ for (int i=0; i<num; i++) {
+ ArrayList<Object> struct = new ArrayList<Object>();
+ if (i==0) {
+ struct.add(null);
+ } else {
+ struct.add((int)((r.nextDouble()-0.5)*1.5*Integer.MAX_VALUE));
+ }
+ structs[i] = struct;
+ }
+ sort(structs);
+ // Null should be smaller than any other value, so put a null at the front end
+ // to test whether that is held.
+ ((List)structs[0]).set(0, null);
+ ddl = "struct test { i32 hello}";
+ System.out.println("Testing " + ddl);
+ testTBinarySortableProtocol(structs, ddl, true);
+ testTBinarySortableProtocol(structs, ddl, false);
+
+ // Test long
+ for (int i=0; i<num; i++) {
+ ArrayList<Object> struct = new ArrayList<Object>();
+ if (i==0) {
+ struct.add(null);
+ } else {
+ struct.add((long)((r.nextDouble()-0.5)*1.5*Long.MAX_VALUE));
+ }
+ structs[i] = struct;
+ }
+ sort(structs);
+ // Null should be smaller than any other value, so put a null at the front end
+ // to test whether that is held.
+ ((List)structs[0]).set(0, null);
+ ddl = "struct test { i64 hello}";
+ System.out.println("Testing " + ddl);
+ testTBinarySortableProtocol(structs, ddl, true);
+ testTBinarySortableProtocol(structs, ddl, false);
+
+ // Test string
+ for (int i=0; i<num; i++) {
+ ArrayList<Object> struct = new ArrayList<Object>();
+ if (i==0) {
+ struct.add(null);
+ } else {
+ struct.add(String.valueOf((r.nextDouble()-0.5)*1000));
+ }
+ structs[i] = struct;
+ }
+ sort(structs);
+ // Null should be smaller than any other value, so put a null at the front end
+ // to test whether that is held.
+ ((List)structs[0]).set(0, null);
+ ddl = "struct test { string hello}";
+ System.out.println("Testing " + ddl);
+ testTBinarySortableProtocol(structs, ddl, true);
+ testTBinarySortableProtocol(structs, ddl, false);
+
+ // Test string + double
+ for (int i=0; i<num; i++) {
+ ArrayList<Object> struct = new ArrayList<Object>();
+ if (i%9==0) {
+ struct.add(null);
+ } else {
+ struct.add("str" + (i/5));
+ }
+ if (i%7==0) {
+ struct.add(null);
+ } else {
+ struct.add(Double.valueOf((r.nextDouble()-0.5)*10));
+ }
+ structs[i] = struct;
+ }
+ sort(structs);
+ // Null should be smaller than any other value, so put a null at the front end
+ // to test whether that is held.
+ ((List)structs[0]).set(0, null);
+ ddl = "struct test { string hello, double another}";
+ System.out.println("Testing " + ddl);
+ testTBinarySortableProtocol(structs, ddl, true);
+ testTBinarySortableProtocol(structs, ddl, false);
+
+ System.out.println("Test testTBinarySortableProtocol passed!");
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+ }
public void testConfigurableTCTLSeparated() throws Throwable {
@@ -161,20 +394,14 @@
serde.initialize(new Configuration(), schema);
TCTLSeparatedProtocol prot = (TCTLSeparatedProtocol)serde.oprot_;
- assertTrue(prot.getPrimarySeparator() == 9);
+ assertTrue(prot.getPrimarySeparator().equals("\u0009"));
ObjectInspector oi = serde.getObjectInspector();
// Try to serialize
BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
- StringBuilder sb = new StringBuilder();
- for (int i=0; i<bytes.getSize(); i++) {
- byte b = bytes.get()[i];
- int v = (b<0 ? 256 + b : b);
- sb.append(String.format("x%02x", v));
- }
- System.out.println("bytes =" + sb);
+ hexString(bytes);
String compare = "234" + "\u0009" + "firstString" + "\u0001" + "secondString" + "\u0009" + "firstKey" + "\u0004" + "1" + "\u0001" + "secondKey" + "\u0004" + "2";
@@ -201,4 +428,356 @@
}
}
+
+
+ /**
+ * Tests a single null list within a struct with return nulls on
+ */
+
+ public void testNulls1() throws Throwable {
+ try {
+
+
+ // Try to construct an object
+ ArrayList<String> bye = null;
+ HashMap<String, Integer> another = new HashMap<String, Integer>();
+ another.put("firstKey", 1);
+ another.put("secondKey", 2);
+ ArrayList<Object> struct = new ArrayList<Object>();
+ struct.add(Integer.valueOf(234));
+ struct.add(bye);
+ struct.add(another);
+
+ Properties schema = new Properties();
+ schema.setProperty(Constants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
+ schema.setProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME, "test");
+ schema.setProperty(Constants.SERIALIZATION_DDL,
+ "struct test { i32 hello, list<string> bye, map<string,i32> another}");
+ schema.setProperty(Constants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
+ schema.setProperty(TCTLSeparatedProtocol.ReturnNullsKey, "true");
+
+ DynamicSerDe serde = new DynamicSerDe();
+ serde.initialize(new Configuration(), schema);
+
+ ObjectInspector oi = serde.getObjectInspector();
+
+ // Try to serialize
+ BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
+
+ hexString(bytes);
+
+ // Try to deserialize
+ Object o = serde.deserialize(bytes);
+ assertEquals(struct, o);
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+
+ }
+
+ /**
+ * Tests all elements of a struct being null with return nulls on
+ */
+
+ public void testNulls2() throws Throwable {
+ try {
+
+
+ // Try to construct an object
+ ArrayList<String> bye = null;
+ HashMap<String, Integer> another = null;
+ ArrayList<Object> struct = new ArrayList<Object>();
+ struct.add(null);
+ struct.add(bye);
+ struct.add(another);
+
+ Properties schema = new Properties();
+ schema.setProperty(Constants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
+ schema.setProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME, "test");
+ schema.setProperty(Constants.SERIALIZATION_DDL,
+ "struct test { i32 hello, list<string> bye, map<string,i32> another}");
+ schema.setProperty(Constants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
+ schema.setProperty(TCTLSeparatedProtocol.ReturnNullsKey, "true");
+
+ DynamicSerDe serde = new DynamicSerDe();
+ serde.initialize(new Configuration(), schema);
+
+ ObjectInspector oi = serde.getObjectInspector();
+
+ // Try to serialize
+ BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
+
+ hexString(bytes);
+
+ // Try to deserialize
+ Object o = serde.deserialize(bytes);
+ List<?> olist = (List<?>)o;
+
+ assertTrue(olist.size() == 3);
+ assertEquals(null, olist.get(0));
+ assertEquals(null, olist.get(1));
+ assertEquals(null, olist.get(2));
+
+ // assertEquals(o, struct); Cannot do this because types of null lists are wrong.
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+
+ }
+
+ /**
+ * Tests map and list being empty with return nulls on
+ */
+
+ public void testNulls3() throws Throwable {
+ try {
+
+
+ // Try to construct an object
+ ArrayList<String> bye = new ArrayList<String> ();
+ HashMap<String, Integer> another = null;
+ ArrayList<Object> struct = new ArrayList<Object>();
+ struct.add(null);
+ struct.add(bye);
+ struct.add(another);
+
+ Properties schema = new Properties();
+ schema.setProperty(Constants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
+ schema.setProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME, "test");
+ schema.setProperty(Constants.SERIALIZATION_DDL,
+ "struct test { i32 hello, list<string> bye, map<string,i32> another}");
+ schema.setProperty(Constants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
+
+ schema.setProperty(TCTLSeparatedProtocol.ReturnNullsKey, "true");
+ DynamicSerDe serde = new DynamicSerDe();
+ serde.initialize(new Configuration(), schema);
+
+ ObjectInspector oi = serde.getObjectInspector();
+
+ // Try to serialize
+ BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
+
+ hexString(bytes);
+
+ // Try to deserialize
+ Object o = serde.deserialize(bytes);
+ List<?> olist = (List<?>)o;
+
+ assertTrue(olist.size() == 3);
+ assertEquals(null, olist.get(0));
+ assertEquals(0, ((List<?>)olist.get(1)).size());
+ assertEquals(null, olist.get(2));
+
+ // assertEquals(o, struct); Cannot do this because types of null lists are wrong.
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+
+ }
+
+
+ /**
+ * Tests map and list null/empty with return nulls *off*
+ */
+
+ public void testNulls4() throws Throwable {
+ try {
+
+
+ // Try to construct an object
+ ArrayList<String> bye = new ArrayList<String> ();
+ HashMap<String, Integer> another = null;
+ ArrayList<Object> struct = new ArrayList<Object>();
+ struct.add(null);
+ struct.add(bye);
+ struct.add(another);
+
+ Properties schema = new Properties();
+ schema.setProperty(Constants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
+ schema.setProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME, "test");
+ schema.setProperty(Constants.SERIALIZATION_DDL,
+ "struct test { i32 hello, list<string> bye, map<string,i32> another}");
+ schema.setProperty(Constants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
+
+ schema.setProperty(TCTLSeparatedProtocol.ReturnNullsKey, "false");
+ DynamicSerDe serde = new DynamicSerDe();
+ serde.initialize(new Configuration(), schema);
+
+ ObjectInspector oi = serde.getObjectInspector();
+
+ // Try to serialize
+ BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
+
+ hexString(bytes);
+
+ // Try to deserialize
+ Object o = serde.deserialize(bytes);
+ List<?> olist = (List<?>)o;
+
+ assertTrue(olist.size() == 3);
+ assertEquals(new Integer(0), (Integer)olist.get(0));
+ List<?> num1 = (List<?>)olist.get(1);
+ assertTrue(num1.size() == 0);
+ Map<?,?> num2 = (Map<?,?>)olist.get(2);
+ assertTrue(num2.size() == 0);
+
+ // assertEquals(o, struct); Cannot do this because types of null lists are wrong.
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+
+ }
+
+
+ /**
+ * Tests map and list null/empty with return nulls *off*
+ */
+
+ public void testStructsinStructs() throws Throwable {
+ try {
+
+
+ Properties schema = new Properties();
+ // schema.setProperty(Constants.SERIALIZATION_FORMAT, com.facebook.thrift.protocol.TJSONProtocol.class.getName());
+ schema.setProperty(Constants.SERIALIZATION_FORMAT, com.facebook.thrift.protocol.TBinaryProtocol.class.getName());
+ schema.setProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME, "test");
+ schema.setProperty(Constants.SERIALIZATION_DDL,
+ "struct inner { i32 field1, string field2 },struct test {inner foo, i32 hello, list<string> bye, map<string,i32> another}");
+ schema.setProperty(Constants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
+
+
+ //
+ // construct object of above type
+ //
+
+ // construct the inner struct
+ ArrayList<Object> innerStruct = new ArrayList<Object>();
+ innerStruct.add(new Integer(22));
+ innerStruct.add(new String("hello world"));
+
+ // construct outer struct
+ ArrayList<String> bye = new ArrayList<String> ();
+ bye.add("firstString");
+ bye.add("secondString");
+ HashMap<String, Integer> another = new HashMap<String, Integer>();
+ another.put("firstKey", 1);
+ another.put("secondKey", 2);
+
+ ArrayList<Object> struct = new ArrayList<Object>();
+
+ struct.add(innerStruct);
+ struct.add(Integer.valueOf(234));
+ struct.add(bye);
+ struct.add(another);
+
+ DynamicSerDe serde = new DynamicSerDe();
+ serde.initialize(new Configuration(), schema);
+
+ ObjectInspector oi = serde.getObjectInspector();
+
+ // Try to serialize
+ BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
+
+ // Try to deserialize
+ Object o = serde.deserialize(bytes);
+ List<?> olist = (List<?>)o;
+
+
+ assertEquals(4, olist.size());
+ assertEquals(innerStruct, olist.get(0));
+ assertEquals(new Integer(234), olist.get(1));
+ assertEquals(bye, olist.get(2));
+ assertEquals(another, olist.get(3));
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+
+ }
+
+
+
+
+ public void testSkip() throws Throwable {
+ try {
+
+ // Try to construct an object
+ ArrayList<String> bye = new ArrayList<String>();
+ bye.add("firstString");
+ bye.add("secondString");
+ HashMap<String, Integer> another = new HashMap<String, Integer>();
+ another.put("firstKey", 1);
+ another.put("secondKey", 2);
+ ArrayList<Object> struct = new ArrayList<Object>();
+ struct.add(Integer.valueOf(234));
+ struct.add(bye);
+ struct.add(another);
+
+ Properties schema = new Properties();
+ schema.setProperty(Constants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
+ schema.setProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME, "test");
+ schema.setProperty(Constants.SERIALIZATION_DDL,
+ "struct test { i32 hello, list<string> bye, map<string,i32> another}");
+ schema.setProperty(Constants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
+
+ schema.setProperty(Constants.FIELD_DELIM, "9");
+ schema.setProperty(Constants.COLLECTION_DELIM, "1");
+ schema.setProperty(Constants.LINE_DELIM, "2");
+ schema.setProperty(Constants.MAPKEY_DELIM, "4");
+
+ DynamicSerDe serde = new DynamicSerDe();
+ serde.initialize(new Configuration(), schema);
+
+ TCTLSeparatedProtocol prot = (TCTLSeparatedProtocol)serde.oprot_;
+ assertTrue(prot.getPrimarySeparator().equals("\u0009"));
+
+ ObjectInspector oi = serde.getObjectInspector();
+
+ // Try to serialize
+ BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
+
+ hexString(bytes);
+
+ String compare = "234" + "\u0009" + "firstString" + "\u0001" + "secondString" + "\u0009" + "firstKey" + "\u0004" + "1" + "\u0001" + "secondKey" + "\u0004" + "2";
+
+ System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()) + ">");
+ System.out.println("compare to =" + compare + ">");
+
+ assertTrue(compare.equals( new String(bytes.get(), 0, bytes.getSize())));
+
+ schema.setProperty(Constants.SERIALIZATION_DDL,
+ "struct test { i32 hello, skip list<string> bye, map<string,i32> another}");
+
+ serde.initialize(new Configuration(), schema);
+
+ // Try to deserialize
+ Object o = serde.deserialize(bytes);
+ System.out.println("o class = " + o.getClass());
+ List<?> olist = (List<?>)o;
+ System.out.println("o size = " + olist.size());
+ System.out.println("o = " + o);
+
+ assertEquals(null, olist.get(1));
+
+ // set the skipped field to null
+ struct.set(1,null);
+
+ assertEquals(o, struct);
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+
+ }
+
}
Modified: hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestStandardObjectInspectors.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestStandardObjectInspectors.java?rev=712905&r1=712904&r2=712905&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestStandardObjectInspectors.java (original)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestStandardObjectInspectors.java Mon Nov 10 17:50:06 2008
@@ -34,7 +34,7 @@
assertEquals(oi1, oi2);
assertEquals(Category.PRIMITIVE, oi1.getCategory());
assertEquals(c, oi1.getPrimitiveClass());
- assertEquals(ObjectInspectorUtils.getClassShortName(c.getName()),
+ assertEquals(ObjectInspectorUtils.getClassShortName(c),
oi1.getTypeName());
} catch (Throwable e) {
e.printStackTrace();