You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by dh...@apache.org on 2008/10/21 20:11:18 UTC
svn commit: r706704 [23/23] - in /hadoop/core/trunk: ./ src/contrib/hive/
src/contrib/hive/bin/
src/contrib/hive/cli/src/java/org/apache/hadoop/hive/cli/
src/contrib/hive/common/src/java/org/apache/hadoop/hive/conf/
src/contrib/hive/conf/ src/contrib/h...
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/thrift_grammar.jjt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/thrift_grammar.jjt?rev=706704&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/thrift_grammar.jjt (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/thrift_grammar.jjt Tue Oct 21 11:11:05 2008
@@ -0,0 +1,866 @@
+options {
+ MULTI=true;
+ STATIC = false;
+ NODE_PREFIX = "DynamicSerDe";
+}
+
+
+PARSER_BEGIN(thrift_grammar)
+
+package org.apache.hadoop.hive.serde2.dynamic_type;
+
+import java.util.*;
+import java.io.*;
+import java.net.*;
+import com.facebook.thrift.protocol.*;
+import com.facebook.thrift.transport.*;
+import org.apache.hadoop.hive.serde2.dynamic_type.*;
+
+public class thrift_grammar {
+
+ private List<String> include_path = null;
+
+ // for computing the autogenerated field ids in thrift
+ private int field_val;
+
+ // store types and tables
+ // separately because one cannot use a table (ie service.method) as a Struct like type.
+ protected Map<String,DynamicSerDeSimpleNode> types;
+ protected Map<String,DynamicSerDeSimpleNode> tables;
+
+ // system include path
+ final private static String default_include_path[] = { "/usr/local/include","/usr/include","/usr/local/include/thrift/if","/usr/local/include/fb303/if" };
+
+ // need three params to differentiate between this and 2 param method auto generated since
+ // some calls in the autogenerated code use null param for 2nd param and thus ambiguous.
+ protected thrift_grammar(InputStream is, List<String> include_path, boolean junk) {
+ this(is,null);
+ this.types = new HashMap<String,DynamicSerDeSimpleNode> () ;
+ this.tables = new HashMap<String,DynamicSerDeSimpleNode> () ;
+ this.include_path = include_path;
+ this.field_val = -1;
+ }
+
+ // find the file on the include path
+ private static File findFile(String fname, List<String> include_path) {
+ for(String path: include_path) {
+ final String full = path + "/" + fname;
+ File f = new File(full);
+ if(f.exists()) {
+ return f;
+ }
+ }
+ return null;
+ }
+
+ public static void main(String args[]) {
+ String filename = null;
+ List<String> include_path = new ArrayList<String>();
+
+ for(String path: default_include_path) {
+ include_path.add(path);
+ }
+ for(int i = 0; i < args.length; i++) {
+ String arg = args[i];
+ if(arg.equals("--include") && i + 1 < args.length) {
+ include_path.add(args[++i]);
+ }
+ if(arg.equals("--file") && i + 1 < args.length) {
+ filename = args[++i];
+ }
+ }
+
+ InputStream is = System.in;
+ if(filename != null) {
+ try {
+ is = new FileInputStream(findFile(filename, include_path));
+ } catch(IOException e) {
+ }
+ }
+ thrift_grammar t = new thrift_grammar(is,include_path,false);
+
+ try {
+ t.Start();
+ } catch (Exception e) {
+ System.out.println("Parse error.");
+ System.out.println(e.getMessage());
+ e.printStackTrace();
+ }
+ }
+}
+
+PARSER_END(thrift_grammar)
+
+
+
+SKIP :
+{
+ " "
+| "\t"
+| "\n"
+| "\r"
+| <"#"(~["\n"])* ("\n"|"\r"|"\r\n")>
+| <"//" (~["\n","\r"])* ("\n"|"\r"|"\r\n")>
+| <"/*" (~["*"])* "*" (~["/"] (~["*"])* "*")* "/">
+}
+
+
+/**
+ * HELPER DEFINITIONS, COMMENTS, CONSTANTS, AND WHATNOT
+ */
+
+TOKEN:
+{
+<tok_const: "const">|
+<tok_namespace : "namespace"> |
+<tok_cpp_namespace: "cpp_namespace">|
+<tok_cpp_include : "cpp_include">|
+<tok_cpp_type: "cpp_type">|
+<tok_java_package : "java_package">|
+<tok_cocoa_prefix: "cocoa_prefix">|
+<tok_csharp_namespace: "csharp_namespace">|
+<tok_php_namespace: "php_namespace">|
+<tok_py_module: "py_module">|
+<tok_perl_package: "perl_package">|
+<tok_ruby_namespace: "ruby_namespace">|
+<tok_smalltalk_category: "smalltalk_category">|
+<tok_smalltalk_prefix: "smalltalk_prefix">|
+<tok_xsd_all: "xsd_all">|
+<tok_xsd_optional: "xsd_optional">|
+<tok_xsd_nillable: "xsd_nillable">|
+<tok_xsd_namespace: "xsd_namespace">|
+<tok_xsd_attrs: "xsd_attrs">|
+<tok_include : "include">|
+<tok_void : "void">|
+<tok_bool : "bool">|
+<tok_byte: "byte">|
+<tok_i16: "i16">|
+<tok_i32: "i32">|
+<tok_i64: "i64">|
+<tok_double: "double">|
+<tok_string: "string">|
+<tok_slist : "slist">|
+<tok_senum: "senum">|
+<tok_map: "map"> |
+<tok_list: "list"> |
+<tok_set: "set"> |
+<tok_async: "async"> |
+<tok_typedef: "typedef"> |
+<tok_struct: "struct"> |
+<tok_exception: "exception"> |
+<tok_extends: "extends"> |
+<tok_throws: "throws"> |
+<tok_service: "service"> |
+<tok_enum: "enum"> |
+<tok_required: "required"> |
+<tok_optional: "optional">
+}
+
+TOKEN: {
+
+<tok_int_constant : (["+","-"])?(["0"-"9"])+>
+|
+<tok_double_constant: ["+","-"](<DIGIT>)*"."(<DIGIT>)+(["e","E"](["+","-"])?(<DIGIT>)+)?>
+|
+<IDENTIFIER: <LETTER>(<LETTER>|<DIGIT>|"."|"_")*>
+|
+<#LETTER: (["a"-"z", "A"-"Z" ]) >
+|
+<#DIGIT: ["0"-"9"] >
+|
+<tok_literal: "\""(~["\""])*"\""|"'"(~["'"])*"'">
+|
+<tok_st_identifier: ["a"-"z","A"-"Z","-"]([".","a"-"z","A"-"Z","_","0"-"9","-"])*>
+}
+
+
+SimpleNode Start() : {}
+{
+ HeaderList() (Definition())+
+ {
+ return jjtThis;
+ }
+}
+
+SimpleNode HeaderList() : {}
+{
+ (Header())*
+ {
+ return jjtThis;
+ }
+
+}
+
+SimpleNode Header() : {}
+{
+ Include()
+ {
+ return jjtThis;
+ }
+| Namespace()
+ {
+ return jjtThis;
+ }
+}
+
+SimpleNode Namespace() : {}
+{
+ <tok_namespace> <IDENTIFIER> <IDENTIFIER>
+{
+ return jjtThis;
+}
+|
+<tok_cpp_namespace> <IDENTIFIER>
+{
+ return jjtThis;
+}
+|
+<tok_cpp_include> <tok_literal>
+{
+ return jjtThis;
+}
+|
+<tok_php_namespace> <IDENTIFIER>
+{
+ return jjtThis;
+}
+|
+<tok_py_module> <IDENTIFIER>
+{
+ return jjtThis;
+}
+|
+<tok_perl_package> <IDENTIFIER>
+{
+ return jjtThis;
+}
+|
+<tok_ruby_namespace> <IDENTIFIER>
+{
+ return jjtThis;
+}
+|
+<tok_smalltalk_category> <tok_st_identifier>
+{
+ return jjtThis;
+}
+|
+<tok_smalltalk_prefix> <IDENTIFIER>
+{
+ return jjtThis;
+}
+|
+<tok_java_package> <IDENTIFIER>
+{
+ return jjtThis;
+}
+|
+<tok_cocoa_prefix> <IDENTIFIER>
+{
+ return jjtThis;
+}
+|
+<tok_xsd_namespace> <tok_literal>
+{
+ return jjtThis;
+}
+|
+<tok_csharp_namespace> <IDENTIFIER>
+{
+ return jjtThis;
+}
+}
+
+
+SimpleNode Include() : {
+ String fname;
+ boolean found = false;
+}
+{
+ <tok_include>
+ fname=<tok_literal>.image
+{
+ // bugbug somewhat fragile below substring expression
+ fname = fname.substring(1,fname.length() - 1);
+
+ // try to find the file on the include path
+ File f = thrift_grammar.findFile(fname, this.include_path);
+ if(f != null) {
+ found = true;
+ try {
+ FileInputStream fis = new FileInputStream(f);
+ thrift_grammar t = new thrift_grammar(fis,this.include_path, false);
+ t.Start();
+ fis.close();
+ found = true;
+ // add in what we found to our type and table tables.
+ this.tables.putAll(t.tables);
+ this.types.putAll(t.types);
+ } catch (Exception e) {
+ System.out.println("File: " + fname + " - Oops.");
+ System.out.println(e.getMessage());
+ e.printStackTrace();
+ }
+ }
+ if(!found) {
+ throw new RuntimeException("include file not found: " + fname);
+ }
+ return jjtThis;
+}
+}
+
+
+SimpleNode Definition() : {}
+{
+ Const()
+ {
+ return jjtThis;
+ }
+| Service()
+ {
+ return jjtThis;
+ }
+| TypeDefinition()
+ {
+ return jjtThis;
+ }
+}
+
+SimpleNode TypeDefinition() : {}
+{
+ Typedef()
+ {
+ return jjtThis;
+ }
+| Enum()
+ {
+ return jjtThis;
+ }
+| Senum()
+ {
+ return jjtThis;
+ }
+| Struct()
+ {
+ return jjtThis;
+ }
+| Xception()
+ {
+ return jjtThis;
+ }
+
+}
+
+DynamicSerDeTypedef Typedef() : {}
+{
+ <tok_typedef>
+ DefinitionType()
+ jjtThis.name = <IDENTIFIER>.image
+ {
+ // store the type for later retrieval
+ this.types.put(jjtThis.name, jjtThis);
+ return jjtThis;
+ }
+}
+
+
+// returning void because we ignore this production.
+void CommaOrSemicolon() : {}
+{
+ ","
+|
+ ";"
+{
+}
+}
+
+SimpleNode Enum() : {}
+{
+ <tok_enum> <IDENTIFIER> "{" EnumDefList() "}"
+ {
+ return jjtThis;
+ }
+}
+
+SimpleNode EnumDefList() : {}
+{
+ (EnumDef())+
+ {
+ return jjtThis;
+ }
+}
+
+SimpleNode EnumDef() : {}
+{
+ <IDENTIFIER> ["=" <tok_int_constant>] [CommaOrSemicolon()]
+ {
+ return jjtThis;
+ }
+}
+
+SimpleNode Senum() : {}
+{
+ <tok_senum> <IDENTIFIER> "{" SenumDefList() "}"
+ {
+ return jjtThis;
+ }
+}
+
+SimpleNode SenumDefList() : {}
+{
+ (SenumDef())+
+ {
+ return jjtThis;
+ }
+}
+
+SimpleNode SenumDef() : {}
+{
+ <tok_literal> [CommaOrSemicolon()]
+ {
+ return jjtThis;
+ }
+}
+
+
+SimpleNode Const() : {}
+{
+ <tok_const> FieldType() <IDENTIFIER> "=" ConstValue() [CommaOrSemicolon()]
+ {
+ return jjtThis;
+ }
+}
+
+SimpleNode ConstValue() : {}
+{
+ <tok_int_constant>
+ {
+ }
+| <tok_double_constant>
+ {
+ }
+| <tok_literal>
+ {
+ }
+| <IDENTIFIER>
+ {
+ }
+| ConstList()
+ {
+ }
+| ConstMap()
+ {
+ return jjtThis;
+ }
+}
+
+SimpleNode ConstList() : {}
+{
+ "[" ConstListContents() "]"
+ {
+ return jjtThis;
+ }
+}
+
+SimpleNode ConstListContents() : {}
+{
+ (ConstValue() [CommaOrSemicolon()])+
+ {
+ return jjtThis;
+ }
+}
+
+SimpleNode ConstMap() : {}
+{
+ "{" ConstMapContents() "}"
+ {
+ return jjtThis;
+ }
+}
+
+SimpleNode ConstMapContents() : {}
+{
+ (ConstValue() ":" ConstValue() [CommaOrSemicolon()])+
+ {
+ }
+|
+ {
+ return jjtThis;
+ }
+}
+
+DynamicSerDeStruct Struct() : {
+
+}
+{
+ <tok_struct>
+ jjtThis.name = <IDENTIFIER>.image
+ "{"
+ FieldList()
+ "}"
+ {
+ this.types.put(jjtThis.name,jjtThis);
+ return jjtThis;
+ }
+}
+
+
+SimpleNode Xception() : {}
+{
+ <tok_exception> <IDENTIFIER> "{" FieldList() "}"
+ {
+ return jjtThis;
+ }
+}
+
+
+SimpleNode Service() : {}
+{
+ <tok_service>
+ <IDENTIFIER>
+ Extends()
+ "{"
+ FlagArgs()
+ (Function())+
+ UnflagArgs()
+ "}"
+ {
+ // at some point, these should be inserted as a "db"
+ return jjtThis;
+ }
+}
+
+SimpleNode FlagArgs() : {}
+{
+ {
+ return jjtThis;
+ }
+}
+
+SimpleNode UnflagArgs() : {}
+{
+ {
+ return jjtThis;
+ }
+}
+
+SimpleNode Extends() : {}
+{
+ <tok_extends> <IDENTIFIER>
+ {
+ return jjtThis;
+ }
+|
+ {
+ return jjtThis;
+ }
+}
+
+
+DynamicSerDeFunction Function() : {}
+{
+ // metastore ignores async and type
+ Async()
+ FunctionType()
+
+ // the name of the function/table
+ jjtThis.name = <IDENTIFIER>.image
+ "("
+ FieldList()
+ ")"
+ Throws()
+ [CommaOrSemicolon()]
+
+ {
+ this.tables.put(jjtThis.name, jjtThis);
+ return jjtThis;
+ }
+}
+
+void Async() : {}
+{
+ <tok_async>
+|
+{}
+}
+
+void Throws() : {}
+{
+ <tok_throws> "(" FieldList() ")"
+|
+{}
+}
+
+
+// nothing special - just use the DynamicSerDeFieldList's children methods to access the fields
+DynamicSerDeFieldList FieldList() : {
+ this.field_val = -1;
+}
+{
+ (Field())* {
+ return jjtThis;
+ }
+}
+
+
+DynamicSerDeField Field() : {
+
+ String fidnum = "";
+ String fid;
+}
+{
+
+ // parse the field id which is optional
+ [fidnum=<tok_int_constant>.image ":"]
+
+ // is this field required or optional? default is optional
+ FieldRequiredness()
+
+ // field type - obviously not optional
+ FieldType()
+
+ // the name of the field - not optional
+ jjtThis.name = <IDENTIFIER>.image
+
+ // does it have = some value?
+ FieldValue()
+
+ // take it or leave it
+ [CommaOrSemicolon()]
+
+ {
+ if(fidnum.length() > 0) {
+ int fidInt = Integer.valueOf(fidnum);
+ jjtThis.fieldid = fidInt;
+ } else {
+ jjtThis.fieldid = this.field_val--;
+ }
+ return jjtThis;
+ }
+}
+
+
+
+SimpleNode FieldRequiredness() : {}
+{
+ <tok_required>
+ {
+ return jjtThis;
+ }
+| <tok_optional>
+ {
+ return jjtThis;
+ }
+|
+ {
+ return jjtThis;
+ }
+}
+
+SimpleNode FieldValue() : {}
+{
+ "="
+ ConstValue()
+ {
+ return jjtThis;
+ }
+|
+{
+ return jjtThis;
+}
+}
+
+SimpleNode DefinitionType() : {}
+{
+// BaseType() xxx
+ TypeString()
+ {
+ return jjtThis;
+ }
+| TypeBool()
+ {
+ return jjtThis;
+ }
+| Typei16()
+ {
+ return jjtThis;
+ }
+| Typei32()
+ {
+ return jjtThis;
+ }
+| Typei64()
+ {
+ return jjtThis;
+ }
+| TypeDouble()
+ {
+ return jjtThis;
+ }
+| TypeMap()
+ {
+ return jjtThis;
+ }
+| TypeSet()
+ {
+ return jjtThis;
+ }
+| TypeList()
+ {
+ return jjtThis;
+ }
+}
+
+void FunctionType() : {}
+{
+ FieldType()
+| <tok_void>
+{}
+}
+
+DynamicSerDeFieldType FieldType() : {
+}
+
+{
+ TypeString()
+ {
+ return jjtThis;
+ }
+| TypeBool()
+ {
+ return jjtThis;
+ }
+| Typei16()
+ {
+ return jjtThis;
+ }
+| Typei32()
+ {
+ return jjtThis;
+ }
+| Typei64()
+ {
+ return jjtThis;
+ }
+| TypeDouble()
+ {
+ return jjtThis;
+ }
+|
+ TypeMap()
+ {
+ return jjtThis;
+ }
+|
+ TypeSet()
+ {
+ return jjtThis;
+ }
+|
+ TypeList()
+ {
+ return jjtThis;
+ }
+|
+ jjtThis.name = <IDENTIFIER>.image
+ {
+ return jjtThis;
+ }
+}
+
+DynamicSerDeTypeString TypeString() : {}
+{
+ <tok_string>
+ {
+ return jjtThis;
+ }
+}
+
+DynamicSerDeTypeByte TypeByte() : {
+}
+{
+ <tok_byte>
+ {
+ return jjtThis;
+ }
+}
+
+DynamicSerDeTypei16 Typei16() : {
+}
+{
+ <tok_i16>
+ {
+ return jjtThis;
+ }
+}
+
+DynamicSerDeTypei32 Typei32() : {}
+{
+ <tok_i32>
+ {
+ return jjtThis;
+ }
+}
+
+DynamicSerDeTypei64 Typei64() : {}
+{
+ <tok_i64>
+ {
+ return jjtThis;
+ }
+}
+
+DynamicSerDeTypeDouble TypeDouble() : {}
+{
+ <tok_double>
+ {
+ return jjtThis;
+ }
+}
+
+DynamicSerDeTypeBool TypeBool() : {}
+{
+ <tok_bool>
+ {
+ return jjtThis;
+ }
+}
+
+DynamicSerDeTypeMap TypeMap() : {}
+{
+ <tok_map>
+ "<"
+ FieldType()
+ ","
+ FieldType()
+ ">"
+ {
+ return jjtThis;
+ }
+}
+
+DynamicSerDeTypeSet TypeSet() : {}
+{
+ <tok_set>
+ "<"
+
+ FieldType()
+
+ ">"
+ {
+ return jjtThis;
+ }
+}
+
+DynamicSerDeTypeList TypeList() : {}
+{
+ <tok_list>
+ "<"
+
+ FieldType()
+
+ ">"
+ {
+ return jjtThis;
+ }
+}
Modified: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ReflectionStructObjectInspector.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ReflectionStructObjectInspector.java?rev=706704&r1=706703&r2=706704&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ReflectionStructObjectInspector.java (original)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ReflectionStructObjectInspector.java Tue Oct 21 11:11:05 2008
@@ -116,11 +116,16 @@
if (data == null) {
return null;
}
+ if (!(fieldRef instanceof MyField)) {
+ throw new RuntimeException("fieldRef has to be of MyField");
+ }
+ MyField f = (MyField) fieldRef;
try {
- MyField f = (MyField) fieldRef;
- return f.field.get(data);
+ Object r = f.field.get(data);
+ return r;
} catch (Exception e) {
- throw new RuntimeException(e);
+ throw new RuntimeException("cannot get field " + f.field + " from "
+ + data.getClass() + " " + data);
}
}
public List<Object> getStructFieldsDataAsList(Object data) {
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ConfigurableTProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ConfigurableTProtocol.java?rev=706704&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ConfigurableTProtocol.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ConfigurableTProtocol.java Tue Oct 21 11:11:05 2008
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.thrift;
+
+import org.apache.hadoop.conf.Configuration;
+import java.util.Properties;
+import com.facebook.thrift.TException;
+
+/**
+ * An interface for TProtocols that need to have properties passed in to
+ * initialize them. e.g., separators for TCTLSeparatedProtocol.
+ * If there was a regex like deserializer, the regex could be passed in
+ * in this manner.
+ */
+public interface ConfigurableTProtocol {
+ /**
+ * Initialize the TProtocol
+ * @param conf System properties
+ * @param tbl table properties
+ * @throws TException
+ */
+ public void initialize(Configuration conf, Properties tbl) throws TException;
+
+}
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java?rev=706704&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java Tue Oct 21 11:11:05 2008
@@ -0,0 +1,576 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.hadoop.hive.serde2.thrift;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde.Constants;
+import com.facebook.thrift.TException;
+import com.facebook.thrift.transport.*;
+import com.facebook.thrift.*;
+import com.facebook.thrift.protocol.*;
+import java.util.*;
+import java.util.regex.Pattern;
+import java.io.*;
+import org.apache.hadoop.conf.Configuration;
+import java.util.Properties;
+
+/**
+ *
+ * An implementation of the Thrift Protocol for ctl separated
+ * records.
+ * This is not thrift compliant in that it doesn't write out field ids
+ * so things cannot actually be versioned.
+ */
+public class TCTLSeparatedProtocol extends TProtocol implements ConfigurableTProtocol {
+
+ final static Log LOG = LogFactory.getLog(TCTLSeparatedProtocol.class.getName());
+
+ /**
+ * Factory for JSON protocol objects
+ */
+ public static class Factory implements TProtocolFactory {
+
+ public TProtocol getProtocol(TTransport trans) {
+ return new TCTLSeparatedProtocol(trans);
+ }
+
+ }
+
+ /**
+ * These are defaults, but for now leaving them like this
+ */
+ final static protected byte defaultPrimarySeparatorByte = 1;
+ final static protected byte defaultSecondarySeparatorByte = 2;
+ final static protected byte defaultRowSeparatorByte = (byte)'\n';
+ final static protected byte defaultMapSeparatorByte = 3;
+
+ /**
+ * The separators for this instance
+ */
+ protected byte primarySeparatorByte;
+ protected byte secondarySeparatorByte;
+ protected byte rowSeparatorByte;
+ protected byte mapSeparatorByte;
+ protected Pattern primaryPattern;
+ protected Pattern secondaryPattern;
+ protected Pattern mapPattern;
+
+ /**
+ * Inspect the separators this instance is configured with.
+ */
+ public byte getPrimarySeparator() { return primarySeparatorByte; }
+ public byte getSecondarySeparator() { return secondarySeparatorByte; }
+ public byte getRowSeparator() { return rowSeparatorByte; }
+ public byte getMapSeparator() { return mapSeparatorByte; }
+
+
+ /**
+ * The transport stream is tokenized on the row separator
+ */
+ protected SimpleTransportTokenizer transportTokenizer;
+
+ /**
+ * For a single row, the split on the primary separator
+ */
+ protected String columns[];
+
+ /**
+ * An index into what column we're on
+ */
+
+ protected int index;
+
+ /**
+ * For a single column, a split on the secondary separator
+ */
+ protected String fields[];
+
+ /**
+ * An index into what field within a column we're on
+ */
+ protected int innerIndex;
+
+
+ /**
+ * Is this the first field we're writing
+ */
+ protected boolean firstField;
+
+ /**
+ * Is this the first list/map/set field we're writing for the current element
+ */
+ protected boolean firstInnerField;
+
+
+ /**
+ * Are we writing a map and need to worry about k/v separator?
+ */
+ protected boolean isMap;
+
+
+ /**
+ * For writes, on what element are we on so we know when to use normal list separator or
+ * for a map know when to use the k/v separator
+ */
+ protected long elemIndex;
+
+
+ /**
+ * Are we currently on the top-level columns or parsing a column itself
+ */
+ protected boolean inner;
+
+
+ /**
+ * For places where the separators are back to back, should we return a null or an empty string since it is ambiguous.
+ * This also applies to extra columns that are read but aren't in the current record.
+ */
+ protected boolean returnNulls;
+
+ /**
+ * The transport being wrapped.
+ *
+ */
+ final protected TTransport innerTransport;
+
+
+ /**
+ * Strings used to lookup the various configurable paramaters of this protocol.
+ */
+ public final static String ReturnNullsKey = "separators.return_nulls";
+ public final static String BufferSizeKey = "separators.buffer_size";
+
+ /**
+ * The size of the internal buffer to use.
+ */
+ protected int bufferSize;
+
+ /**
+ * A convenience class for tokenizing a TTransport
+ */
+
+ class SimpleTransportTokenizer {
+
+ TTransport trans;
+ StringTokenizer tokenizer;
+ final String separator;
+ byte buf[];
+
+ public SimpleTransportTokenizer(TTransport trans, byte separator, int buffer_length) {
+ this.trans = trans;
+ byte [] separators = new byte[1];
+ separators[0] = separator;
+ this.separator = new String(separators);
+ buf = new byte[buffer_length];
+ fillTokenizer();
+ }
+
+ private boolean fillTokenizer() {
+ try {
+ int length = trans.read(buf, 0, buf.length);
+ if(length <=0 ) {
+ tokenizer = new StringTokenizer("", separator, true);
+ return false;
+ }
+ String row = new String(buf, 0, length);
+ tokenizer = new StringTokenizer(row, new String(separator), true);
+ } catch(TTransportException e) {
+ e.printStackTrace();
+ tokenizer = null;
+ return false;
+ }
+ return true;
+ }
+
+ public String nextToken() throws EOFException {
+ StringBuffer ret = null;
+ boolean done = false;
+
+ while(! done) {
+
+ if(! tokenizer.hasMoreTokens()) {
+ if(! fillTokenizer()) {
+ break;
+ }
+ }
+
+ try {
+ final String nextToken = tokenizer.nextToken();
+
+ if(nextToken.equals(separator)) {
+ done = true;
+ } else if(ret == null) {
+ ret = new StringBuffer(nextToken);
+ } else {
+ ret.append(nextToken);
+ }
+ } catch(NoSuchElementException e) {
+ if (ret == null) {
+ throw new EOFException(e.getMessage());
+ }
+ done = true;
+ }
+ } // while ! done
+ return ret == null ? null : ret.toString();
+ }
+ };
+
+
+ /**
+ * The simple constructor which assumes ctl-a, ctl-b and '\n' separators and to return empty strings for empty fields.
+ *
+ * @param trans - the ttransport to use as input or output
+ *
+ */
+
+ public TCTLSeparatedProtocol(TTransport trans) {
+ this(trans, defaultPrimarySeparatorByte, defaultSecondarySeparatorByte, defaultMapSeparatorByte, defaultRowSeparatorByte, false, 4096);
+ }
+
+ public TCTLSeparatedProtocol(TTransport trans, int buffer_size) {
+ this(trans, defaultPrimarySeparatorByte, defaultSecondarySeparatorByte, defaultMapSeparatorByte, defaultRowSeparatorByte, false, buffer_size);
+ }
+
+ /**
+ * @param trans - the ttransport to use as input or output
+ * @param primarySeparatorByte the separator between columns (aka fields)
+ * @param secondarySeparatorByte the separator within a field for things like sets and maps and lists
+ * @param mapSeparatorByte - the key/value separator
+ * @param rowSeparatorByte - the record separator
+ * @param returnNulls - whether to return a null or an empty string for fields that seem empty (ie two primary separators back to back)
+ */
+
+ public TCTLSeparatedProtocol(TTransport trans, byte primarySeparatorByte, byte secondarySeparatorByte, byte mapSeparatorByte, byte rowSeparatorByte,
+ boolean returnNulls,
+ int bufferSize) {
+ super(trans);
+
+ returnNulls = returnNulls;
+
+
+ this.primarySeparatorByte = primarySeparatorByte;
+ this.secondarySeparatorByte = secondarySeparatorByte;
+ this.rowSeparatorByte = rowSeparatorByte;
+ this.mapSeparatorByte = mapSeparatorByte;
+
+ this.innerTransport = trans;
+ this.bufferSize = bufferSize;
+
+ internalInitialize();
+ }
+
+
+ /**
+ * Sets the internal separator patterns and creates the internal tokenizer.
+ */
+ protected void internalInitialize() {
+ byte []primarySeparator = new byte[1];
+ byte []secondarySeparator = new byte[1];
+ primarySeparator[0] = primarySeparatorByte;
+ secondarySeparator[0] = secondarySeparatorByte;
+
+ primaryPattern = Pattern.compile(new String(primarySeparator));
+ secondaryPattern = Pattern.compile(new String(secondarySeparator));
+ mapPattern = Pattern.compile("\\0" + secondarySeparatorByte + "|\\0" + mapSeparatorByte);
+
+ transportTokenizer = new SimpleTransportTokenizer(innerTransport, rowSeparatorByte, bufferSize);
+ }
+
+ /**
+ * Initialize the TProtocol
+ * @param conf System properties
+ * @param tbl table properties
+ * @throws TException
+ */
+ public void initialize(Configuration conf, Properties tbl) throws TException {
+ primarySeparatorByte = Byte.valueOf(tbl.getProperty(Constants.FIELD_DELIM, String.valueOf(primarySeparatorByte))).byteValue();
+ LOG.debug("collections delim=<" + tbl.getProperty(Constants.COLLECTION_DELIM) + ">" );
+ secondarySeparatorByte = Byte.valueOf(tbl.getProperty(Constants.COLLECTION_DELIM, String.valueOf(secondarySeparatorByte))).byteValue();
+ rowSeparatorByte = Byte.valueOf(tbl.getProperty(Constants.LINE_DELIM, String.valueOf(rowSeparatorByte))).byteValue();
+ mapSeparatorByte = Byte.valueOf(tbl.getProperty(Constants.MAPKEY_DELIM, String.valueOf(mapSeparatorByte))).byteValue();
+ returnNulls = Boolean.valueOf(tbl.getProperty(ReturnNullsKey, String.valueOf(returnNulls))).booleanValue();
+ bufferSize = Integer.valueOf(tbl.getProperty(BufferSizeKey, String.valueOf(bufferSize))).intValue();
+
+ internalInitialize();
+
+ }
+
+ public void writeMessageBegin(TMessage message) throws TException {
+ }
+
+ public void writeMessageEnd() throws TException {
+ }
+
+ public void writeStructBegin(TStruct struct) throws TException {
+ firstField = true;
+ }
+
+ public void writeStructEnd() throws TException {
+ // We don't write rowSeparatorByte because that should be handled by file format.
+ }
+
+ public void writeFieldBegin(TField field) throws TException {
+ if(! firstField) {
+ writeByte(primarySeparatorByte);
+ }
+ firstField = false;
+ }
+
+ public void writeFieldEnd() throws TException {
+ }
+
+ public void writeFieldStop() {
+ }
+
+ public void writeMapBegin(TMap map) throws TException {
+ // nesting not allowed!
+ if(map.keyType == TType.STRUCT ||
+ map.keyType == TType.MAP ||
+ map.keyType == TType.LIST ||
+ map.keyType == TType.SET) {
+ throw new TException("Not implemented: nested structures");
+ }
+ // nesting not allowed!
+ if(map.valueType == TType.STRUCT ||
+ map.valueType == TType.MAP ||
+ map.valueType == TType.LIST ||
+ map.valueType == TType.SET) {
+ throw new TException("Not implemented: nested structures");
+ }
+
+ firstInnerField = true;
+ isMap = true;
+ inner = true;
+ elemIndex = 0;
+ }
+
+ public void writeMapEnd() throws TException {
+ isMap = false;
+ inner = false;
+ }
+
+ public void writeListBegin(TList list) throws TException {
+ if(list.elemType == TType.STRUCT ||
+ list.elemType == TType.MAP ||
+ list.elemType == TType.LIST ||
+ list.elemType == TType.SET) {
+ throw new TException("Not implemented: nested structures");
+ }
+ firstInnerField = true;
+ inner = true;
+ }
+
+ public void writeListEnd() throws TException {
+ inner = false;
+ }
+
+ public void writeSetBegin(TSet set) throws TException {
+ if(set.elemType == TType.STRUCT ||
+ set.elemType == TType.MAP ||
+ set.elemType == TType.LIST ||
+ set.elemType == TType.SET) {
+ throw new TException("Not implemented: nested structures");
+ }
+ firstInnerField = true;
+ inner = true;
+ }
+
+ public void writeSetEnd() throws TException {
+ inner = false;
+ }
+
+ public void writeBool(boolean b) throws TException {
+ writeString(String.valueOf(b));
+ }
+
+ // for writing out single byte
+ private byte buf[] = new byte[1];
+ public void writeByte(byte b) throws TException {
+ buf[0] = b;
+ trans_.write(buf);
+ }
+
+ public void writeI16(short i16) throws TException {
+ writeString(String.valueOf(i16));
+ }
+
+ public void writeI32(int i32) throws TException {
+ writeString(String.valueOf(i32));
+ }
+
+ public void writeI64(long i64) throws TException {
+ writeString(String.valueOf(i64));
+ }
+
+ public void writeDouble(double dub) throws TException {
+ writeString(String.valueOf(dub));
+ }
+
+ public void writeString(String str) throws TException {
+ if(inner) {
+ if(!firstInnerField) {
+ // super hack city notice the mod plus only happens after firstfield hit, so == 0 is right.
+ if(isMap && elemIndex++ % 2 == 0) {
+ writeByte(mapSeparatorByte);
+ } else {
+ writeByte(secondarySeparatorByte);
+ }
+ } else {
+ firstInnerField = false;
+ }
+ }
+ final byte buf[] = str.getBytes();
+ trans_.write(buf, 0, buf.length);
+ }
+
+ public void writeBinary(byte[] bin) throws TException {
+ throw new TException("Ctl separated protocol cannot support writing Binary data!");
+ }
+
+ public TMessage readMessageBegin() throws TException {
+ return new TMessage();
+ }
+
+ public void readMessageEnd() throws TException {
+ }
+
+ public TStruct readStructBegin() throws TException {
+ assert(!inner);
+ try {
+ final String tmp = transportTokenizer.nextToken();
+ columns = primaryPattern.split(tmp);
+ index = 0;
+ return new TStruct();
+ } catch(EOFException e) {
+ return null;
+ }
+ }
+
+ public void readStructEnd() throws TException {
+ columns = null;
+ }
+
+ public TField readFieldBegin() throws TException {
+ assert( !inner);
+ TField f = new TField();
+ // slight hack to communicate to DynamicSerDe that the field ids are not being set but things are ordered.
+ f.type = -1;
+ return f;
+ }
+
+ public void readFieldEnd() throws TException {
+ fields = null;
+ }
+
+ public TMap readMapBegin() throws TException {
+ assert( !inner);
+ TMap map = new TMap();
+ fields = mapPattern.split(columns[index++]);
+ if(fields != null) {
+ map.size = fields.length/2;
+ } else {
+ map.size = 0;
+ }
+ innerIndex = 0;
+ inner = true;
+ isMap = true;
+ return map;
+ }
+
+ public void readMapEnd() throws TException {
+ inner = false;
+ isMap = false;
+ }
+
+ public TList readListBegin() throws TException {
+ assert( !inner);
+ TList list = new TList();
+ fields = secondaryPattern.split(columns[index++]);
+ if(fields != null) {
+ list.size = fields.length ;
+ } else {
+ list.size = 0;
+ }
+ innerIndex = 0;
+ inner = true;
+ return list;
+ }
+
+ public void readListEnd() throws TException {
+ inner = false;
+ }
+
+ public TSet readSetBegin() throws TException {
+ assert( !inner);
+ TSet set = new TSet();
+ fields = secondaryPattern.split(columns[index++]);
+ if(fields != null) {
+ set.size = fields.length ;
+ } else {
+ set.size = 0;
+ }
+ inner = true;
+ innerIndex = 0;
+ return set;
+ }
+
+ public void readSetEnd() throws TException {
+ inner = false;
+ }
+ public boolean readBool() throws TException {
+ return Boolean.valueOf(readString()).booleanValue();
+ }
+
+ public byte readByte() throws TException {
+ return Byte.valueOf(readString()).byteValue();
+ }
+
+ public short readI16() throws TException {
+ return Short.valueOf(readString()).shortValue();
+ }
+
+ public int readI32() throws TException {
+ return Integer.valueOf(readString()).intValue();
+ }
+
+ public long readI64() throws TException {
+ return Long.valueOf(readString()).longValue();
+ }
+
+ public double readDouble() throws TException {
+ return Double.valueOf(readString()).doubleValue();
+ }
+
+ protected String [] curMapPair;
+ public String readString() throws TException {
+ String ret;
+ if(!inner) {
+ ret = columns != null && index < columns.length ? columns[index++] : null;
+ } else {
+ ret = fields != null && innerIndex < fields.length ? fields[innerIndex++] : null;
+ }
+ return ret == null && ! returnNulls ? "" : ret;
+ }
+
+ public byte[] readBinary() throws TException {
+ throw new TException("Not implemented for control separated data");
+ }
+}
Modified: hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde/TestSerDeUtils.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde/TestSerDeUtils.java?rev=706704&r1=706703&r2=706704&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde/TestSerDeUtils.java (original)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde/TestSerDeUtils.java Tue Oct 21 11:11:05 2008
@@ -31,7 +31,7 @@
public void testLookupSerDe() throws Exception {
try {
- String name = ThriftSerDe.shortName();
+ String name = ThriftSerDe.class.getName();
SerDe s = SerDeUtils.lookupSerDe(name);
assertTrue(s.getClass().getName().equals(org.apache.hadoop.hive.serde.thrift.ThriftSerDe.class.getName()));
} catch(Exception e) {
Added: hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/TestTCTLSeparatedProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/TestTCTLSeparatedProtocol.java?rev=706704&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/TestTCTLSeparatedProtocol.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/TestTCTLSeparatedProtocol.java Tue Oct 21 11:11:05 2008
@@ -0,0 +1,245 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import junit.framework.TestCase;
+import java.io.*;
+import org.apache.hadoop.hive.serde2.*;
+import org.apache.hadoop.hive.serde2.thrift.*;
+import java.util.*;
+import com.facebook.thrift.TException;
+import com.facebook.thrift.transport.*;
+import com.facebook.thrift.*;
+import com.facebook.thrift.protocol.*;
+
+public class TestTCTLSeparatedProtocol extends TestCase {
+
+ public TestTCTLSeparatedProtocol() throws Exception {
+ }
+
+ public void testReads() throws Exception {
+ try {
+ TMemoryBuffer trans = new TMemoryBuffer(1024);
+ String foo = "Hello";
+ String bar = "World!";
+
+ String key = "22";
+ String value = "TheValue";
+ String key2 = "24";
+ String value2 = "TheValueAgain";
+
+ byte columnSeparator [] = { 1 };
+ byte elementSeparator [] = { 2 };
+ byte kvSeparator [] = { 3 };
+
+
+ trans.write(foo.getBytes(), 0, foo.getBytes().length);
+ trans.write(columnSeparator, 0, 1);
+
+ trans.write(columnSeparator, 0, 1);
+
+ trans.write(bar.getBytes(), 0, bar.getBytes().length);
+ trans.write(columnSeparator, 0, 1);
+
+ trans.write(key.getBytes(), 0, key.getBytes().length);
+ trans.write(kvSeparator, 0, 1);
+ trans.write(value.getBytes(), 0, value.getBytes().length);
+ trans.write(elementSeparator, 0, 1);
+
+ trans.write(key2.getBytes(), 0, key2.getBytes().length);
+ trans.write(kvSeparator, 0, 1);
+ trans.write(value2.getBytes(), 0, value2.getBytes().length);
+
+
+ trans.flush();
+
+
+ // use 3 as the row buffer size to force lots of re-buffering.
+ TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 3);
+
+ prot.readStructBegin();
+
+ prot.readFieldBegin();
+ String hello = prot.readString();
+ prot.readFieldEnd();
+
+ assertTrue(hello.equals(foo));
+
+ prot.readFieldBegin();
+ assertTrue(prot.readString().equals(""));
+ prot.readFieldEnd();
+
+ prot.readFieldBegin();
+ assertTrue(prot.readString().equals(bar));
+ prot.readFieldEnd();
+
+ prot.readFieldBegin();
+ TMap mapHeader = prot.readMapBegin();
+ assertTrue(mapHeader.size == 2);
+
+ assertTrue(prot.readI32() == 22);
+ assertTrue(prot.readString().equals(value));
+ assertTrue(prot.readI32() == 24);
+ assertTrue(prot.readString().equals(value2));
+ prot.readMapEnd();
+ prot.readFieldEnd();
+
+ prot.readFieldBegin();
+ hello = prot.readString();
+ prot.readFieldEnd();
+ assertTrue(hello.length() == 0);
+
+ prot.readStructEnd();
+
+ } catch(Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+
+ public void testWrites() throws Exception {
+ try {
+ TMemoryBuffer trans = new TMemoryBuffer(1024);
+ TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 3);
+
+ prot.writeStructBegin(new TStruct());
+ prot.writeFieldBegin(new TField());
+ prot.writeI32(100);
+ prot.writeFieldEnd();
+
+ prot.writeFieldBegin(new TField());
+ prot.writeListBegin(new TList());
+ prot.writeDouble(348.55);
+ prot.writeDouble(234.22);
+ prot.writeListEnd();
+ prot.writeFieldEnd();
+
+ prot.writeFieldBegin(new TField());
+ prot.writeString("hello world!");
+ prot.writeFieldEnd();
+
+ prot.writeFieldBegin(new TField());
+ prot.writeMapBegin(new TMap());
+ prot.writeString("key1");
+ prot.writeString("val1");
+ prot.writeString("key2");
+ prot.writeString("val2");
+ prot.writeString("key3");
+ prot.writeString("val3");
+ prot.writeMapEnd();
+ prot.writeFieldEnd();
+
+ prot.writeFieldBegin(new TField());
+ prot.writeListBegin(new TList());
+ prot.writeString("elem1");
+ prot.writeString("elem2");
+ prot.writeListEnd();
+ prot.writeFieldEnd();
+
+
+ prot.writeFieldBegin(new TField());
+ prot.writeString("bye!");
+ prot.writeFieldEnd();
+
+ prot.writeStructEnd();
+ trans.flush();
+ byte b[] = new byte[3*1024];
+ int len = trans.read(b,0,b.length);
+ String test = new String(b, 0, len);
+
+ String testRef = "100348.55234.22hello world!key1val1key2val2key3val3elem1elem2bye!";
+ // System.err.println("test=" + test + ">");
+ // System.err.println(" ref=" + testRef + ">");
+ assertTrue(test.equals(testRef));
+
+ trans = new TMemoryBuffer(1023);
+ trans.write(b, 0, len);
+
+ //
+ // read back!
+ //
+
+ prot = new TCTLSeparatedProtocol(trans, 10);
+
+ // 100 is the start
+ prot.readStructBegin();
+ prot.readFieldBegin();
+ assertTrue(prot.readI32() == 100);
+ prot.readFieldEnd();
+
+ // let's see if doubles work ok
+ prot.readFieldBegin();
+ TList l = prot.readListBegin();
+ assertTrue(l.size == 2);
+ assertTrue(prot.readDouble() == 348.55);
+ assertTrue(prot.readDouble() == 234.22);
+ prot.readListEnd();
+ prot.readFieldEnd();
+
+ // nice message
+ prot.readFieldBegin();
+ assertTrue(prot.readString().equals("hello world!"));
+ prot.readFieldEnd();
+
+ // 3 element map
+ prot.readFieldBegin();
+ TMap m = prot.readMapBegin();
+ assertTrue(m.size == 3);
+ assertTrue(prot.readString().equals("key1"));
+ assertTrue(prot.readString().equals("val1"));
+ assertTrue(prot.readString().equals("key2"));
+ assertTrue(prot.readString().equals("val2"));
+ assertTrue(prot.readString().equals("key3"));
+ assertTrue(prot.readString().equals("val3"));
+ prot.readMapEnd();
+ prot.readFieldEnd();
+
+ // the 2 element list
+ prot.readFieldBegin();
+ l = prot.readListBegin();
+ assertTrue(l.size == 2);
+ assertTrue(prot.readString().equals("elem1"));
+ assertTrue(prot.readString().equals("elem2"));
+ prot.readListEnd();
+ prot.readFieldEnd();
+
+ // final string
+ prot.readFieldBegin();
+ assertTrue(prot.readString().equals("bye!"));
+ prot.readFieldEnd();
+
+ // shouldl return nulls at end
+ prot.readFieldBegin();
+ assertTrue(prot.readString().equals(""));
+ prot.readFieldEnd();
+
+ // shouldl return nulls at end
+ prot.readFieldBegin();
+ assertTrue(prot.readString().equals(""));
+ prot.readFieldEnd();
+
+ prot.readStructEnd();
+
+
+ } catch(Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+}
Added: hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/dynamic_type/TestDynamicSerDe.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/dynamic_type/TestDynamicSerDe.java?rev=706704&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/dynamic_type/TestDynamicSerDe.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/dynamic_type/TestDynamicSerDe.java Tue Oct 21 11:11:05 2008
@@ -0,0 +1,204 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.dynamic_type;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol;
+import org.apache.hadoop.hive.serde.Constants;
+
+import junit.framework.TestCase;
+import org.apache.hadoop.io.BytesWritable;
+
+public class TestDynamicSerDe extends TestCase {
+
+ public void testDynamicSerDe() throws Throwable {
+ try {
+
+ // Try to construct an object
+ ArrayList<String> bye = new ArrayList<String>();
+ bye.add("firstString");
+ bye.add("secondString");
+ HashMap<String, Integer> another = new HashMap<String, Integer>();
+ another.put("firstKey", 1);
+ another.put("secondKey", 2);
+ ArrayList<Object> struct = new ArrayList<Object>();
+ struct.add(Integer.valueOf(234));
+ struct.add(bye);
+ struct.add(another);
+
+ // All protocols
+ ArrayList<String> protocols = new ArrayList<String>();
+ ArrayList<Boolean> isBinaries = new ArrayList<Boolean>();
+
+ protocols.add(com.facebook.thrift.protocol.TBinaryProtocol.class.getName());
+ isBinaries.add(true);
+
+ protocols.add(com.facebook.thrift.protocol.TJSONProtocol.class.getName());
+ isBinaries.add(false);
+
+ // TSimpleJSONProtocol does not support deserialization.
+ // protocols.add(com.facebook.thrift.protocol.TSimpleJSONProtocol.class.getName());
+ // isBinaries.add(false);
+
+ // TCTLSeparatedProtocol is not done yet.
+ protocols.add(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
+ isBinaries.add(false);
+
+ System.out.println("input struct = " + struct);
+
+ for(int pp = 0; pp<protocols.size(); pp++) {
+
+ String protocol = protocols.get(pp);
+ boolean isBinary = isBinaries.get(pp);
+
+ System.out.println("Testing protocol: " + protocol);
+ Properties schema = new Properties();
+ schema.setProperty(Constants.SERIALIZATION_FORMAT, protocol);
+ schema.setProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME, "test");
+ schema.setProperty(Constants.SERIALIZATION_DDL,
+ "struct test { i32 hello, list<string> bye, map<string,i32> another}");
+ schema.setProperty(Constants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
+
+ DynamicSerDe serde = new DynamicSerDe();
+ serde.initialize(new Configuration(), schema);
+
+ // Try getObjectInspector
+ ObjectInspector oi = serde.getObjectInspector();
+ System.out.println("TypeName = " + oi.getTypeName());
+
+
+ // Try to serialize
+ BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
+
+ StringBuilder sb = new StringBuilder();
+ for (int i=0; i<bytes.getSize(); i++) {
+ byte b = bytes.get()[i];
+ int v = (b<0 ? 256 + b : b);
+ sb.append(String.format("x%02x", v));
+ }
+ System.out.println("bytes =" + sb);
+
+ if (!isBinary) {
+ System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()));
+ }
+
+ // Try to deserialize
+ Object o = serde.deserialize(bytes);
+ System.out.println("o class = " + o.getClass());
+ List<?> olist = (List<?>)o;
+ System.out.println("o size = " + olist.size());
+ System.out.println("o[0] class = " + olist.get(0).getClass());
+ System.out.println("o[1] class = " + olist.get(1).getClass());
+ System.out.println("o[2] class = " + olist.get(2).getClass());
+ System.out.println("o = " + o);
+
+ assertEquals(o, struct);
+ }
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+
+ }
+
+
+
+
+ public void testConfigurableTCTLSeparated() throws Throwable {
+ try {
+
+
+ // Try to construct an object
+ ArrayList<String> bye = new ArrayList<String>();
+ bye.add("firstString");
+ bye.add("secondString");
+ HashMap<String, Integer> another = new HashMap<String, Integer>();
+ another.put("firstKey", 1);
+ another.put("secondKey", 2);
+ ArrayList<Object> struct = new ArrayList<Object>();
+ struct.add(Integer.valueOf(234));
+ struct.add(bye);
+ struct.add(another);
+
+ Properties schema = new Properties();
+ schema.setProperty(Constants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
+ schema.setProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME, "test");
+ schema.setProperty(Constants.SERIALIZATION_DDL,
+ "struct test { i32 hello, list<string> bye, map<string,i32> another}");
+ schema.setProperty(Constants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
+
+ schema.setProperty(Constants.FIELD_DELIM, "9");
+ schema.setProperty(Constants.COLLECTION_DELIM, "1");
+ schema.setProperty(Constants.LINE_DELIM, "2");
+ schema.setProperty(Constants.MAPKEY_DELIM, "4");
+
+ DynamicSerDe serde = new DynamicSerDe();
+ serde.initialize(new Configuration(), schema);
+
+ TCTLSeparatedProtocol prot = (TCTLSeparatedProtocol)serde.oprot_;
+ assertTrue(prot.getPrimarySeparator() == 9);
+
+ ObjectInspector oi = serde.getObjectInspector();
+
+ // Try to serialize
+ BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
+
+ StringBuilder sb = new StringBuilder();
+ for (int i=0; i<bytes.getSize(); i++) {
+ byte b = bytes.get()[i];
+ int v = (b<0 ? 256 + b : b);
+ sb.append(String.format("x%02x", v));
+ }
+ System.out.println("bytes =" + sb);
+
+ String compare = "234" + "\u0009" + "firstString" + "\u0001" + "secondString" + "\u0009" + "firstKey" + "\u0004" + "1" + "\u0001" + "secondKey" + "\u0004" + "2";
+
+ System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()) + ">");
+ System.out.println("compare to =" + compare + ">");
+
+ assertTrue(compare.equals( new String(bytes.get(), 0, bytes.getSize())));
+
+ // Try to deserialize
+ Object o = serde.deserialize(bytes);
+ System.out.println("o class = " + o.getClass());
+ List<?> olist = (List<?>)o;
+ System.out.println("o size = " + olist.size());
+ System.out.println("o[0] class = " + olist.get(0).getClass());
+ System.out.println("o[1] class = " + olist.get(1).getClass());
+ System.out.println("o[2] class = " + olist.get(2).getClass());
+ System.out.println("o = " + o);
+
+ assertEquals(o, struct);
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+
+ }
+}
Modified: hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorUtils.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorUtils.java?rev=706704&r1=706703&r2=706704&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorUtils.java (original)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorUtils.java Tue Oct 21 11:11:05 2008
@@ -22,8 +22,8 @@
import java.util.List;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.thrift_test.Complex;
-import org.apache.hadoop.hive.serde2.thrift_test.IntString;
+import org.apache.hadoop.hive.serde2.thrift.test.Complex;
+import org.apache.hadoop.hive.serde2.thrift.test.IntString;
import junit.framework.TestCase;
@@ -50,14 +50,14 @@
// real object
Complex cc = new Complex();
cc.aint = 1;
- cc.astring = "test";
+ cc.aString = "test";
List<Integer> c2 = Arrays.asList(new Integer[]{1,2,3});
cc.lint = c2;
List<String> c3 = Arrays.asList(new String[]{"one", "two"});
- cc.lstring = c3;
+ cc.lString = c3;
List<IntString> c4 = new ArrayList<IntString>();
- cc.lintstring = c4;
- cc.mstringstring = null;
+ cc.lintString = c4;
+ cc.mStringString = null;
// standard object
Object c = ObjectInspectorUtils.getStandardObject(cc, oi1);
Modified: hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestThriftObjectInspectors.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestThriftObjectInspectors.java?rev=706704&r1=706703&r2=706704&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestThriftObjectInspectors.java (original)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestThriftObjectInspectors.java Tue Oct 21 11:11:05 2008
@@ -22,8 +22,8 @@
import java.util.List;
import junit.framework.TestCase;
-import org.apache.hadoop.hive.serde2.thrift_test.Complex;
-import org.apache.hadoop.hive.serde2.thrift_test.IntString;
+import org.apache.hadoop.hive.serde2.thrift.test.Complex;
+import org.apache.hadoop.hive.serde2.thrift.test.IntString;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
@@ -53,14 +53,14 @@
// real object
Complex c = new Complex();
c.aint = 1;
- c.astring = "test";
+ c.aString = "test";
List<Integer> c2 = Arrays.asList(new Integer[]{1,2,3});
c.lint = c2;
List<String> c3 = Arrays.asList(new String[]{"one", "two"});
- c.lstring = c3;
+ c.lString = c3;
List<IntString> c4 = new ArrayList<IntString>();
- c.lintstring = c4;
- c.mstringstring = null;
+ c.lintString = c4;
+ c.mStringString = null;
assertEquals(1, soi.getStructFieldData(c, fields.get(0)));
assertEquals("test", soi.getStructFieldData(c, fields.get(1)));
Modified: hadoop/core/trunk/src/contrib/hive/testutils/run_tests
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/testutils/run_tests?rev=706704&r1=706703&r2=706704&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/testutils/run_tests (original)
+++ hadoop/core/trunk/src/contrib/hive/testutils/run_tests Tue Oct 21 11:11:05 2008
@@ -2,9 +2,11 @@
d=`mktemp -d /tmp/hivetest_XXXX`
for i in `find . -name Test\*\.* | grep -v svn | egrep "java$|vm$" | sed 's/.*\/Test/Test/g' | sed 's/\.java//g' | sed 's/\.vm//g' | sort`;
do
- cmd="ant -lib ../../../lib -Dtestcase=$i clean-test test -logfile $d/$i.log"
- echo $cmd;
- $cmd;
+ if [ "$i" != "TestSerDe" ]; then
+ cmd="ant -lib ../../../lib -Dtestcase=$i clean-test test -logfile $d/$i.log"
+ echo $cmd;
+ $cmd;
+ fi
done
cat $d/*.log | grep junit | egrep "Running org|Tests run"
echo Logs at $d