You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by dh...@apache.org on 2008/09/18 02:09:19 UTC
svn commit: r696525 [1/2] - in /hadoop/core/trunk: ./
src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/
src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/
src/contrib/hive/serde/src/test/org/apache/hadoop/hive/s...
Author: dhruba
Date: Wed Sep 17 17:09:17 2008
New Revision: 696525
URL: http://svn.apache.org/viewvc?rev=696525&view=rev
Log:
HADOOP-4138. Refactor the Hive SerDe library to better structure
the interfaces to the serializer and de-serializer.
(Zheng Shao via dhruba)
Added:
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ByteStreamTypedSerDe.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ColumnSet.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/MetadataTypedColumnsetSerDe.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDe.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDeException.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/Serializer.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/TReflectionUtils.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ThriftByteStreamTypedSerDe.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ThriftDeserializer.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/TypedSerDe.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/InspectableObject.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ListObjectInspector.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/MapObjectInspector.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/MetadataListStructObjectInspector.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspector.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ReflectionStructObjectInspector.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardListObjectInspector.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardMapObjectInspector.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardPrimitiveObjectInspector.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StructField.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StructObjectInspector.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ThriftStructObjectInspector.java
hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/UnionStructObjectInspector.java
hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/
hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/
hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/MyStruct.java
hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorUtils.java
hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestReflectionObjectInspectors.java
hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestStandardObjectInspectors.java
hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestThriftObjectInspectors.java
hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestUnionStructObjectInspector.java
hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/thrift_test/
hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/thrift_test/Complex.java
hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/thrift_test/Constants.java
hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/thrift_test/IntString.java
Modified:
hadoop/core/trunk/CHANGES.txt
Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=696525&r1=696524&r2=696525&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Wed Sep 17 17:09:17 2008
@@ -591,6 +591,10 @@
HADOOP-4197. Update DATA_TRANSFER_VERSION for HADOOP-3981. (szetszwo)
+ HADOOP-4138. Refactor the Hive SerDe library to better structure
+ the interfaces to the serializer and de-serializer.
+ (Zheng Shao via dhruba)
+
Release 0.18.1 - 2008-09-17
IMPROVEMENTS
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import java.io.*;
+
+/**
+ * Extensions to bytearrayinput/output streams
+ *
+ */
+public class ByteStream {
+ public static class Input extends ByteArrayInputStream {
+ public byte[] getData() { return buf; }
+ public int getCount() { return count;}
+ public void reset(byte [] argBuf, int argCount) {
+ buf = argBuf; mark = pos = 0; count = argCount;
+ }
+ public Input() {
+ super(new byte [1]);
+ }
+
+ public Input(byte[] buf) {
+ super(buf);
+ }
+ public Input(byte[] buf, int offset, int length) {
+ super(buf, offset, length);
+ }
+ }
+
+ public static class Output extends ByteArrayOutputStream {
+ public byte[] getData() { return buf; }
+ public int getCount() { return count;}
+
+ public Output() { super(); }
+ public Output(int size) { super(size); }
+ }
+}
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ByteStreamTypedSerDe.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ByteStreamTypedSerDe.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ByteStreamTypedSerDe.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ByteStreamTypedSerDe.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import java.lang.reflect.Type;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+public abstract class ByteStreamTypedSerDe extends TypedSerDe {
+
+ protected ByteStream.Input bis;
+ protected ByteStream.Output bos;
+
+ public ByteStreamTypedSerDe(Type objectType) throws SerDeException {
+ super(objectType);
+ bos = new ByteStream.Output();
+ bis = new ByteStream.Input();
+ }
+
+ public Object deserialize(Writable field) throws SerDeException {
+ Object retObj = super.deserialize(field);
+ BytesWritable b = (BytesWritable)field;
+ bis.reset(b.get(), b.getSize());
+ return (retObj);
+ }
+
+
+}
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ColumnSet.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ColumnSet.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ColumnSet.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ColumnSet.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import java.util.ArrayList;
+
+public class ColumnSet {
+ public ArrayList<String> col;
+
+ public ColumnSet() {
+ }
+
+ public ColumnSet(ArrayList<String> col)
+ {
+ this();
+ this.col = col;
+ }
+
+ public String toString() {
+ return col.toString();
+ }
+
+}
+
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.conf.Configuration;
+import java.util.Properties;
+
+/**
+ * HiveDeserializer is used to deserialize the data from hadoop Writable to a
+ * custom java object that can be of any type that the developer wants.
+ *
+ * HiveDeserializer also provides the ObjectInspector which can be used to inspect
+ * the internal structure of the object (that is returned by deserialize function).
+ *
+ */
+public interface Deserializer {
+
+ /**
+ * Initialize the HiveDeserializer.
+ * @param conf System properties
+ * @param tbl table properties
+ * @throws SerDeException
+ */
+ public void initialize(Configuration conf, Properties tbl) throws SerDeException;
+
+ /**
+ * Deserialize an object out of a Writable blob.
+ * In most cases, the return value of this function will be constant since the function
+ * will reuse the returned object.
+ * If the client wants to keep a copy of the object, the client needs to clone the
+ * returned value by calling ObjectInspectorUtils.getStandardObject().
+ * @param blob The Writable object containing a serialized object
+ * @return A Java object representing the contents in the blob.
+ */
+ public Object deserialize(Writable blob) throws SerDeException;
+
+ /**
+ * Get the object inspector that can be used to navigate through the internal
+ * structure of the Object returned from deserialize(...).
+ */
+ public ObjectInspector getObjectInspector() throws SerDeException;
+
+ public String getShortName();
+
+}
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/MetadataTypedColumnsetSerDe.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/MetadataTypedColumnsetSerDe.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/MetadataTypedColumnsetSerDe.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/MetadataTypedColumnsetSerDe.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,189 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import java.io.UnsupportedEncodingException;
+import java.nio.charset.CharacterCodingException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.objectinspector.MetadataListStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+
+public class MetadataTypedColumnsetSerDe implements SerDe {
+
+ public static final Log LOG = LogFactory.getLog(MetadataTypedColumnsetSerDe.class.getName());
+
+ public String getShortName() {
+ return shortName();
+ }
+
+
+ public static String shortName() {
+ return "simple_meta";
+ }
+
+ static {
+ StackTraceElement[] sTrace = new Exception().getStackTrace();
+ String className = sTrace[0].getClassName();
+ try {
+ SerDeUtils.registerSerDe(shortName(), Class.forName(className));
+ // For backward compatibility: this class replaces the following class.
+ SerDeUtils.registerSerDe("org.apache.hadoop.hive.serde.simple_meta.MetadataTypedColumnsetSerDe",
+ Class.forName(className));
+ } catch(Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ final public static String DefaultSeparator = "\001";
+
+ private String separator;
+ // constant for now, will make it configurable later.
+ private String nullString = "\\N";
+ private List<String> columnNames;
+ private ObjectInspector cachedObjectInspector;
+
+ public String toString() {
+ return "MetaDataTypedColumnsetSerDe[" + separator + "," + columnNames + "]";
+ }
+
+ public MetadataTypedColumnsetSerDe() throws SerDeException {
+ separator = DefaultSeparator;
+ }
+
+ public void initialize(Configuration job, Properties tbl) throws SerDeException {
+ separator = DefaultSeparator;
+ String alt_sep = tbl.getProperty(Constants.SERIALIZATION_FORMAT);
+ if(alt_sep != null && alt_sep.length() > 0) {
+ try {
+ byte b [] = new byte[1];
+ b[0] = Byte.valueOf(alt_sep).byteValue();
+ separator = new String(b);
+ } catch(NumberFormatException e) {
+ separator = alt_sep;
+ }
+ }
+ String columnProperty = tbl.getProperty("columns");
+ if (columnProperty == null || columnProperty.length() == 0) {
+ // Hack for tables with no columns
+ // Treat it as a table with a single column called "col"
+ cachedObjectInspector = ObjectInspectorFactory.getReflectionObjectInspector(
+ ColumnSet.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ } else {
+ columnNames = Arrays.asList(columnProperty.split(","));
+ cachedObjectInspector = MetadataListStructObjectInspector.getInstance(columnNames);
+ }
+ System.out.println(getClass().getName() + ": initialized with columnNames: " + columnNames );
+ }
+
+ public static Object deserialize(ColumnSet c, String row, String sep, String nullString) throws Exception {
+ if (c.col == null) {
+ c.col = new ArrayList<String>();
+ } else {
+ c.col.clear();
+ }
+ String [] l1 = row.split(sep, -1);
+
+ for(String s: l1) {
+ if (s.equals(nullString)) {
+ c.col.add(null);
+ } else {
+ c.col.add(s);
+ }
+ }
+ return (c);
+ }
+
+ ColumnSet deserializeCache = new ColumnSet();
+ public Object deserialize(Writable field) throws SerDeException {
+ String row = null;
+ if (field instanceof BytesWritable) {
+ BytesWritable b = (BytesWritable)field;
+ try {
+ row = Text.decode(b.get(), 0, b.getSize());
+ } catch (CharacterCodingException e) {
+ throw new SerDeException(e);
+ }
+ } else if (field instanceof Text) {
+ row = field.toString();
+ }
+ try {
+ deserialize(deserializeCache, row, separator, nullString);
+ if (columnNames != null) {
+ assert(columnNames.size() == deserializeCache.col.size());
+ }
+ return deserializeCache;
+ } catch (ClassCastException e) {
+ throw new SerDeException( this.getClass().getName() + " expects Text or BytesWritable", e);
+ } catch (Exception e) {
+ throw new SerDeException(e);
+ }
+ }
+
+
+ public ObjectInspector getObjectInspector() throws SerDeException {
+ return cachedObjectInspector;
+ }
+
+ public Class<? extends Writable> getSerializedClass() {
+ return Text.class;
+ }
+
+ Text serializeCache = new Text();
+ public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
+
+ if (objInspector.getCategory() != Category.STRUCT) {
+ throw new SerDeException(getClass().toString()
+ + " can only serialize struct types, but we got: " + objInspector.getTypeName());
+ }
+ StructObjectInspector soi = (StructObjectInspector) objInspector;
+ List<? extends StructField> fields = soi.getAllStructFieldRefs();
+
+ StringBuilder sb = new StringBuilder();
+ for(int i=0; i<fields.size(); i++) {
+ if (i>0) sb.append(separator);
+ Object column = soi.getStructFieldData(obj, fields.get(i));
+ if (fields.get(i).getFieldObjectInspector().getCategory() == Category.PRIMITIVE) {
+ // For primitive object, serialize to plain string
+ sb.append(column == null ? nullString : column.toString());
+ } else {
+ // For complex object, serialize to JSON format
+ sb.append(SerDeUtils.getJSONString(column, fields.get(i).getFieldObjectInspector()));
+ }
+ }
+ serializeCache.set(sb.toString());
+ return serializeCache;
+ }
+
+}
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDe.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDe.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDe.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDe.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,33 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+/**
+ * A union of HiveDeserializer and HiveSerializer interface.
+ *
+ * If a developer wants his hive table to be read-only, then he just want to
+ * return
+ *
+ * both readable and writable, then
+ *
+ *
+ */
+public interface SerDe extends Deserializer, Serializer {
+
+}
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDeException.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDeException.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDeException.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDeException.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+/**
+ * Generic exception class for SerDes
+ *
+ */
+
+public class SerDeException extends Exception {
+ private static final long serialVersionUID = 1L;
+
+ public SerDeException() {
+ super();
+ }
+
+ public SerDeException(String message) {
+ super(message);
+ }
+
+ public SerDeException(Throwable cause) {
+ super(cause);
+ }
+
+ public SerDeException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
+
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,242 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import java.util.*;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+
+public class SerDeUtils {
+
+
+ public static final char QUOTE = '"';
+ public static final char COLON = ':';
+ public static final char COMMA = ',';
+ public static final String LBRACKET = "[";
+ public static final String RBRACKET = "]";
+ public static final String LBRACE = "{";
+ public static final String RBRACE = "}";
+
+ private static HashMap<String, Class<?>> serdes = new HashMap<String, Class<?>> ();
+
+ public static void registerSerDe(String name, Class<?> serde) {
+ if(serdes.containsKey(name)) {
+ throw new RuntimeException("double registering serde " + name);
+ }
+ serdes.put(name, serde);
+ }
+
+ public static Deserializer lookupDeserializer(String name) throws SerDeException {
+ Class<?> c;
+ if(serdes.containsKey(name)) {
+ c = serdes.get(name);
+ } else {
+ try {
+ c = Class.forName(name);
+ } catch(ClassNotFoundException e) {
+ throw new SerDeException("SerDe " + name + " does not exist");
+ }
+ }
+ try {
+ return (Deserializer)c.newInstance();
+ } catch(Exception e) {
+ throw new SerDeException(e);
+ }
+ }
+
+
+ private static boolean initCoreSerDes = registerCoreSerDes();
+
+ protected static boolean registerCoreSerDes() {
+ // Eagerly load SerDes so they will register their symbolic names even on Lazy Loading JVMs
+ try {
+ // loading these classes will automatically register the short names
+ Class.forName(org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe.class.getName());
+ Class.forName(org.apache.hadoop.hive.serde2.ThriftDeserializer.class.getName());
+ } catch (ClassNotFoundException e) {
+ throw new RuntimeException("IMPOSSIBLE Exception: Unable to initialize core serdes", e);
+ }
+ return true;
+ }
+
+ public static String escapeString(String str) {
+ int length = str.length();
+ StringBuilder escape = new StringBuilder(length + 16);
+
+ for (int i = 0; i < length; ++i) {
+ char c = str.charAt(i);
+ switch (c) {
+ case '"':
+ case '\\':
+ escape.append('\\');
+ escape.append(c);
+ break;
+ case '\b':
+ escape.append('\\');
+ escape.append('b');
+ break;
+ case '\f':
+ escape.append('\\');
+ escape.append('f');
+ break;
+ case '\n':
+ escape.append('\\');
+ escape.append('n');
+ break;
+ case '\r':
+ escape.append('\\');
+ escape.append('r');
+ break;
+ case '\t':
+ escape.append('\\');
+ escape.append('t');
+ break;
+ default:
+ // Control characeters! According to JSON RFC u0020
+ if (c < ' ') {
+ String hex = Integer.toHexString(c);
+ escape.append('\\');
+ escape.append('u');
+ for (int j = 4; j > hex.length(); --j) {
+ escape.append('0');
+ }
+ escape.append(hex);
+ } else {
+ escape.append(c);
+ }
+ break;
+ }
+ }
+ return (escape.toString());
+ }
+
+
+ public static String lightEscapeString(String str) {
+ int length = str.length();
+ StringBuilder escape = new StringBuilder(length + 16);
+
+ for (int i = 0; i < length; ++i) {
+ char c = str.charAt(i);
+ switch (c) {
+ case '\n':
+ escape.append('\\');
+ escape.append('n');
+ break;
+ case '\r':
+ escape.append('\\');
+ escape.append('r');
+ break;
+ case '\t':
+ escape.append('\\');
+ escape.append('t');
+ break;
+ default:
+ escape.append(c);
+ break;
+ }
+ }
+ return (escape.toString());
+ }
+
+ public static String getJSONString(Object o, ObjectInspector oi) {
+ StringBuilder sb = new StringBuilder();
+ buildJSONString(sb, o, oi);
+ return sb.toString();
+ }
+
+
+ static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi) {
+
+ switch(oi.getCategory()) {
+ case PRIMITIVE: {
+ if (o == null) {
+ sb.append("\\N");
+ } else if (o instanceof String) {
+ sb.append(QUOTE);
+ sb.append(escapeString((String)o));
+ sb.append(QUOTE);
+ } else if (o instanceof Boolean) {
+ sb.append(((Boolean)o).booleanValue() ? "True" : "False");
+ } else {
+ // it's a number - so doesn't need to be escaped.
+ sb.append(o.toString());
+ }
+ break;
+ }
+ case LIST: {
+ sb.append(LBRACKET);
+ ListObjectInspector loi = (ListObjectInspector)oi;
+ ObjectInspector listElementObjectInspector = loi.getListElementObjectInspector();
+ List<?> olist = loi.getList(o);
+ for (int i=0; i<olist.size(); i++) {
+ if (i>0) sb.append(COMMA);
+ buildJSONString(sb, olist.get(i), listElementObjectInspector);
+ }
+ sb.append(RBRACKET);
+ break;
+ }
+ case MAP: {
+ sb.append(LBRACE);
+ MapObjectInspector moi = (MapObjectInspector)oi;
+ ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector();
+ ObjectInspector mapValueObjectInspector = moi.getMapValueObjectInspector();
+ Map<?,?> omap = moi.getMap(o);
+ boolean first = true;
+ for(Object entry : omap.entrySet()) {
+ if (first) {
+ first = false;
+ } else {
+ sb.append(COMMA);
+ }
+ Map.Entry<?,?> e = (Map.Entry<?,?>)entry;
+ buildJSONString(sb, e.getKey(), mapKeyObjectInspector);
+ sb.append(COLON);
+ buildJSONString(sb, e.getValue(), mapValueObjectInspector);
+ }
+ sb.append(RBRACE);
+ break;
+ }
+ case STRUCT: {
+ sb.append(LBRACE);
+ StructObjectInspector soi = (StructObjectInspector)oi;
+ List<? extends StructField> structFields = soi.getAllStructFieldRefs();
+ for(int i=0; i<structFields.size(); i++) {
+ if (i>0) {
+ sb.append(COMMA);
+ }
+ sb.append(QUOTE);
+ sb.append(structFields.get(i).getFieldName());
+ sb.append(QUOTE);
+ sb.append(COLON);
+ buildJSONString(sb, soi.getStructFieldData(o, structFields.get(i)),
+ structFields.get(i).getFieldObjectInspector());
+ }
+ sb.append(RBRACE);
+ break;
+ }
+ default:
+ throw new RuntimeException("Unknown type in ObjectInspector!");
+ };
+
+ }
+}
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/Serializer.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/Serializer.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/Serializer.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/Serializer.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.conf.Configuration;
+import java.util.Properties;
+
+/**
+ * HiveSerializer is used to serialize data to a Hadoop Writable object.
+ * The serialize
+ * In addition to the interface below, all implementations are assume to have a ctor
+ * that takes a single 'Table' object as argument.
+ *
+ */
+public interface Serializer {
+
+ /**
+ * Initialize the HiveSerializer.
+ * @param conf System properties
+ * @param tbl table properties
+ * @throws SerDeException
+ */
+ public void initialize(Configuration conf, Properties tbl) throws SerDeException;
+
+ /**
+ * Returns the Writable class that would be returned by the serialize method.
+ * This is used to initialize SequenceFile header.
+ */
+ public Class<? extends Writable> getSerializedClass();
+ /**
+ * Serialize an object by navigating inside the Object with the ObjectInspector.
+ * In most cases, the return value of this function will be constant since the function
+ * will reuse the Writable object.
+ * If the client wants to keep a copy of the Writable, the client needs to clone the
+ * returned value.
+ */
+ public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException;
+
+ public String getShortName();
+}
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/TReflectionUtils.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/TReflectionUtils.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/TReflectionUtils.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/TReflectionUtils.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import com.facebook.thrift.protocol.TProtocolFactory;
+
+
+public class TReflectionUtils {
+ public static final String thriftReaderFname = "read";
+ public static final String thriftWriterFname = "write";
+
+ public static final Class<?> [] thriftRWParams;
+ static {
+ try {
+ thriftRWParams = new Class [] {
+ Class.forName("com.facebook.thrift.protocol.TProtocol")
+ };
+ } catch (ClassNotFoundException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public static TProtocolFactory getProtocolFactoryByName(String protocolName)
+ throws Exception {
+ Class<?> protoClass = Class.forName(protocolName + "$Factory");
+ return ((TProtocolFactory)protoClass.newInstance());
+ }
+}
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ThriftByteStreamTypedSerDe.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ThriftByteStreamTypedSerDe.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ThriftByteStreamTypedSerDe.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ThriftByteStreamTypedSerDe.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import java.lang.reflect.Type;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Writable;
+
+import com.facebook.thrift.TBase;
+import com.facebook.thrift.protocol.TProtocol;
+import com.facebook.thrift.protocol.TProtocolFactory;
+import com.facebook.thrift.transport.TIOStreamTransport;
+
+public class ThriftByteStreamTypedSerDe extends ByteStreamTypedSerDe {
+
+ protected TIOStreamTransport outTransport, inTransport;
+ protected TProtocol outProtocol, inProtocol;
+
+ private void init(TProtocolFactory inFactory, TProtocolFactory outFactory) throws Exception {
+ outTransport = new TIOStreamTransport(bos);
+ inTransport = new TIOStreamTransport(bis);
+ outProtocol = outFactory.getProtocol(outTransport);
+ inProtocol = inFactory.getProtocol(inTransport);
+ }
+
+ public void initialize(Configuration job, Properties tbl) throws SerDeException {
+ throw new SerDeException("ThriftByteStreamTypedSerDe is still semi-abstract");
+ }
+
+ public static String shortName() {
+ return "thriftbytestream";
+ }
+
+ public String getShortName() {
+ return shortName();
+ }
+
+
+ public ThriftByteStreamTypedSerDe(Type objectType, TProtocolFactory inFactory,
+ TProtocolFactory outFactory) throws SerDeException {
+ super(objectType);
+ try {
+ init(inFactory, outFactory);
+ } catch (Exception e) {
+ throw new SerDeException(e);
+ }
+ }
+
+ protected ObjectInspectorFactory.ObjectInspectorOptions getObjectInspectorOptions() {
+ return ObjectInspectorFactory.ObjectInspectorOptions.THRIFT;
+ }
+
+ public Object deserialize(Writable field) throws SerDeException {
+ Object obj = super.deserialize(field);
+ try {
+ ((TBase)obj).read(inProtocol);
+ } catch (Exception e) {
+ throw new SerDeException(e);
+ }
+ return obj;
+ }
+
+}
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ThriftDeserializer.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ThriftDeserializer.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ThriftDeserializer.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ThriftDeserializer.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.Writable;
+import com.facebook.thrift.protocol.TProtocolFactory;
+
+public class ThriftDeserializer implements Deserializer {
+
+ public static String shortName() {
+ return "thrift";
+ }
+
+ public String getShortName() {
+ return shortName();
+ }
+
+ static {
+ StackTraceElement[] sTrace = new Exception().getStackTrace();
+ String className = sTrace[0].getClassName();
+ try {
+ SerDeUtils.registerSerDe(shortName(), Class.forName(className));
+ // For backward compatibility: this class replaces the following class.
+ SerDeUtils.registerSerDe("org.apache.hadoop.hive.serde.thrift.ThriftSerDe", Class.forName(className));
+ } catch(Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private ThriftByteStreamTypedSerDe tsd;
+
+ public ThriftDeserializer() { }
+
+ public void initialize(Configuration job, Properties tbl) throws SerDeException {
+ try {
+ // both the classname and the protocol name are Table properties
+ // the only hardwired assumption is that records are fixed on a
+ // per Table basis
+
+ String className = tbl.getProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_CLASS);
+ Class<?> recordClass = Class.forName(className);
+
+ String protoName = tbl.getProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT);
+ if (protoName == null) {
+ protoName = "TBinaryProtocol";
+ }
+
+ TProtocolFactory tp = TReflectionUtils.getProtocolFactoryByName(protoName);
+ tsd = new ThriftByteStreamTypedSerDe(recordClass, tp, tp);
+
+ } catch (Exception e) {
+ throw new SerDeException(e);
+ }
+ }
+
+ public Object deserialize(Writable field) throws SerDeException {
+ return tsd.deserialize(field);
+ }
+
+ public ObjectInspector getObjectInspector() throws SerDeException {
+ return tsd.getObjectInspector();
+ }
+
+}
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/TypedSerDe.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/TypedSerDe.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/TypedSerDe.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/TypedSerDe.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import java.lang.reflect.ParameterizedType;
+import java.lang.reflect.Type;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.util.ReflectionUtils;
+
+public abstract class TypedSerDe implements SerDe {
+
+ protected Type objectType;
+ protected Class<?> objectClass;
+
+ public TypedSerDe(Type objectType) throws SerDeException {
+ this.objectType = objectType;
+ if (objectType instanceof Class) {
+ objectClass = (Class<?>)objectType;
+ } else if (objectType instanceof ParameterizedType) {
+ objectClass = (Class<?>)(((ParameterizedType)objectType).getRawType());
+ } else {
+ throw new SerDeException("Cannot create TypedSerDe with type " + objectType);
+ }
+ }
+
+ protected Object deserializeCache;
+ public Object deserialize(Writable blob) throws SerDeException {
+ if (deserializeCache == null) {
+ return ReflectionUtils.newInstance(objectClass, null);
+ } else {
+ assert(deserializeCache.getClass().equals(objectClass));
+ return deserializeCache;
+ }
+ }
+
+ public ObjectInspector getObjectInspector() throws SerDeException {
+ return ObjectInspectorFactory.getReflectionObjectInspector(objectType,
+ getObjectInspectorOptions());
+ }
+
+ protected ObjectInspectorFactory.ObjectInspectorOptions getObjectInspectorOptions() {
+ return ObjectInspectorFactory.ObjectInspectorOptions.JAVA;
+ }
+
+ public void initialize(Configuration job, Properties tbl)
+ throws SerDeException {
+ // do nothing
+ }
+
+ public Class<? extends Writable> getSerializedClass() {
+ return BytesWritable.class;
+ }
+ public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
+ throw new RuntimeException("not supported");
+ }
+
+}
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/InspectableObject.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/InspectableObject.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/InspectableObject.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/InspectableObject.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.objectinspector;
+
+/**
+ * Simple wrapper of object with ObjectInspector.
+ *
+ * TODO: we need to redefine the hashCode and equals methods, so that
+ * it can be put into a HashMap as a key.
+ *
+ * This class also serves as a facility for a function that returns
+ * both an object and an ObjectInspector.
+ */
+public class InspectableObject {
+
+ public Object o;
+ public ObjectInspector oi;
+
+ public InspectableObject() {
+ this(null,null);
+ }
+ public InspectableObject(Object o, ObjectInspector oi) {
+ this.o = o;
+ this.oi = oi;
+ }
+
+
+}
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ListObjectInspector.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ListObjectInspector.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ListObjectInspector.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ListObjectInspector.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.objectinspector;
+
+import java.util.List;
+
+
+public interface ListObjectInspector extends ObjectInspector {
+
+ // ** Methods that does not need a data object **
+ public ObjectInspector getListElementObjectInspector();
+
+ // ** Methods that need a data object **
+ /** returns null for null list, out-of-the-range index.
+ */
+ public Object getListElement(Object data, int index);
+
+ /** returns -1 for data = null.
+ */
+ public int getListLength(Object data);
+
+ /** returns null for data = null.
+ */
+ public List<?> getList(Object data);
+
+}
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/MapObjectInspector.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/MapObjectInspector.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/MapObjectInspector.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/MapObjectInspector.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.objectinspector;
+
+import java.util.Map;
+
+
+public interface MapObjectInspector extends ObjectInspector {
+
+ // ** Methods that does not need a data object **
+ // Map Type
+ public ObjectInspector getMapKeyObjectInspector();
+
+ public ObjectInspector getMapValueObjectInspector();
+
+ // ** Methods that need a data object **
+ // In this function, key has to be of the same structure as the Map expects.
+ // Most cases key will be primitive type, so it's OK.
+ // In rare cases that key is not primitive, the user is responsible for defining
+ // the hashCode() and equals() methods of the key class.
+ public Object getMapValueElement(Object data, Object key);
+
+ /** returns null for data = null.
+ */
+ public Map<?,?> getMap(Object data);
+
+}
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/MetadataListStructObjectInspector.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/MetadataListStructObjectInspector.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/MetadataListStructObjectInspector.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/MetadataListStructObjectInspector.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.objectinspector;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.ColumnSet;
+
+/**
+ * StructObjectInspector works on struct data that is stored as a Java List or Java Array object.
+ * Basically, the fields are stored sequentially in the List object.
+ *
+ * The names of the struct fields and the internal structure of the struct fields are specified in
+ * the ctor of the StructObjectInspector.
+ *
+ */
+public class MetadataListStructObjectInspector extends StandardStructObjectInspector {
+
+ static HashMap<List<String>, MetadataListStructObjectInspector> cached
+ = new HashMap<List<String>, MetadataListStructObjectInspector>();
+ public static MetadataListStructObjectInspector getInstance(int fields) {
+ return getInstance(ObjectInspectorUtils.getIntegerArray(fields));
+ }
+ public static MetadataListStructObjectInspector getInstance(List<String> columnNames) {
+ MetadataListStructObjectInspector result = cached.get(columnNames);
+ if (result == null) {
+ result = new MetadataListStructObjectInspector(columnNames);
+ cached.put(columnNames, result);
+ }
+ return result;
+ }
+
+ static ArrayList<ObjectInspector> getFieldObjectInspectors(int fields) {
+ ArrayList<ObjectInspector> r = new ArrayList<ObjectInspector>(fields);
+ for(int i=0; i<fields; i++) {
+ r.add(ObjectInspectorFactory.getStandardPrimitiveObjectInspector(String.class));
+ }
+ return r;
+ }
+
+ MetadataListStructObjectInspector(List<String> columnNames) {
+ super(columnNames, getFieldObjectInspectors(columnNames.size()));
+ }
+
+ // Get col object out
+ public Object getStructFieldData(Object data, StructField fieldRef) {
+ if (data instanceof ColumnSet) {
+ data = ((ColumnSet)data).col;
+ }
+ return super.getStructFieldData(data, fieldRef);
+ }
+ // Get col object out
+ public List<Object> getStructFieldsDataAsList(Object data) {
+ if (data instanceof ColumnSet) {
+ data = ((ColumnSet)data).col;
+ }
+ return super.getStructFieldsDataAsList(data);
+ }
+
+}
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspector.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspector.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspector.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspector.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.objectinspector;
+
+/**
+ * ObjectInspector helps us to look into the internal structure of a complex
+ * object.
+ *
+ * A (probably configured) ObjectInspector instance stands for a specific type
+ * and a specific way to store the data of that type in the memory.
+ *
+ * For native java Object, we can directly access the internal structure through
+ * member fields and methods. ObjectInspector is a way to delegate that functionality
+ * away from the Object, so that we have more control on the behavior of those actions.
+ *
+ * An efficient implementation of ObjectInspector should rely on factory, so that we can
+ * make sure the same ObjectInspector only has one instance. That also makes sure
+ * hashCode() and equals() methods of java.lang.Object directly works for ObjectInspector
+ * as well.
+ */
+public interface ObjectInspector {
+
+ public static enum Category {
+ PRIMITIVE, LIST, MAP, STRUCT
+ };
+
+ /**
+ * Returns the name of the data type that is inspected by this ObjectInspector.
+ * This is used to display the type information to the user.
+ *
+ * For primitive types, the type name is standardized.
+ * For other types, the type name can be something like "list<int>", "map<int,string>",
+ * java class names, or user-defined type names similar to typedef.
+ */
+ public String getTypeName();
+
+ /**
+ * An ObjectInspector must inherit from one of the following interfaces
+ * if getCategory() returns:
+ * PRIMITIVE: PrimitiveObjectInspector
+ * LIST: ListObjectInspector
+ * MAP: MapObjectInspector
+ * STRUCT: StructObjectInspector
+ */
+ public Category getCategory();
+
+}
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,196 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.objectinspector;
+
+import java.lang.reflect.Field;
+import java.lang.reflect.GenericArrayType;
+import java.lang.reflect.ParameterizedType;
+import java.lang.reflect.Type;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+
+/**
+ * ObjectInspectorFactory is the primary way to create new ObjectInspector instances.
+ *
+ * SerDe classes should call the static functions in this library to create an ObjectInspector
+ * to return to the caller of SerDe2.getObjectInspector().
+ */
+public class ObjectInspectorFactory {
+
+
+ /**
+ * ObjectInspectorOptions describes what ObjectInspector to use.
+ * JAVA is to use pure JAVA reflection. THRIFT is to use JAVA reflection and filter out __isset fields.
+ * New ObjectInspectorOptions can be added here when available.
+ *
+ * We choose to use a single HashMap objectInspectorCache to cache all situations for efficiency and code
+ * simplicity. And we don't expect a case that a user need to create 2 or more different types of
+ * ObjectInspectors for the same Java type.
+ */
+ public enum ObjectInspectorOptions {
+ JAVA,
+ THRIFT
+ };
+
+ private static HashMap<Type, ObjectInspector> objectInspectorCache = new HashMap<Type, ObjectInspector>();
+
+ public static ObjectInspector getReflectionObjectInspector(Type t, ObjectInspectorOptions options) {
+ ObjectInspector oi = objectInspectorCache.get(t);
+ if (oi == null) {
+ oi = getReflectionObjectInspectorNoCache(t, options);
+ objectInspectorCache.put(t, oi);
+ }
+ if ((options.equals(ObjectInspectorOptions.JAVA) && oi.getClass().equals(ThriftStructObjectInspector.class))
+ || (options.equals(ObjectInspectorOptions.THRIFT) && oi.getClass().equals(ReflectionStructObjectInspector.class))) {
+ throw new RuntimeException("Cannot call getObjectInspectorByReflection with both JAVA and THRIFT !");
+ }
+ return oi;
+ }
+
+ private static ObjectInspector getReflectionObjectInspectorNoCache(Type t, ObjectInspectorOptions options) {
+ if (t instanceof GenericArrayType) {
+ GenericArrayType at = (GenericArrayType)t;
+ return getStandardListObjectInspector(
+ getReflectionObjectInspector(at.getGenericComponentType(), options));
+ }
+
+ if (t instanceof ParameterizedType) {
+ ParameterizedType pt = (ParameterizedType)t;
+ // List?
+ if (List.class.isAssignableFrom((Class<?>)pt.getRawType())) {
+ return getStandardListObjectInspector(
+ getReflectionObjectInspector(pt.getActualTypeArguments()[0], options));
+ }
+ // Map?
+ if (Map.class.isAssignableFrom((Class<?>)pt.getRawType())) {
+ return getStandardMapObjectInspector(
+ getReflectionObjectInspector(pt.getActualTypeArguments()[0], options),
+ getReflectionObjectInspector(pt.getActualTypeArguments()[1], options));
+ }
+ // Otherwise convert t to RawType so we will fall into the following if block.
+ t = pt.getRawType();
+ }
+
+ // Must be a class.
+ if (!(t instanceof Class)) {
+ throw new RuntimeException(ObjectInspectorFactory.class.getName() + ": internal error.");
+ }
+ Class<?> c = (Class<?>)t;
+
+ // Primitive?
+ if (ObjectInspectorUtils.isPrimitiveClass(c)) {
+ return getStandardPrimitiveObjectInspector(c);
+ }
+
+ // Must be struct because List and Map need to be ParameterizedType
+ assert(!List.class.isAssignableFrom(c));
+ assert(!Map.class.isAssignableFrom(c));
+
+ // Create StructObjectInspector
+ ReflectionStructObjectInspector oi;
+ switch(options) {
+ case JAVA:
+ oi = new ReflectionStructObjectInspector();
+ break;
+ case THRIFT:
+ oi = new ThriftStructObjectInspector();
+ break;
+ default:
+ throw new RuntimeException(ObjectInspectorFactory.class.getName() + ": internal error.");
+ }
+ // put it into the cache BEFORE it is initialized to make sure we can catch recursive types.
+ objectInspectorCache.put(t, oi);
+ Field[] fields = c.getDeclaredFields();
+ ArrayList<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>(fields.length);
+ for(int i=0; i<fields.length; i++) {
+ if (!oi.shouldIgnoreField(fields[i].getName())) {
+ structFieldObjectInspectors.add(getReflectionObjectInspector(fields[i].getGenericType(), options));
+ }
+ }
+ oi.init(c, structFieldObjectInspectors);
+ return oi;
+ }
+
+
+ private static HashMap<Class<?>, StandardPrimitiveObjectInspector> cachedStandardPrimitiveInspectorCache = new HashMap<Class<?>, StandardPrimitiveObjectInspector>();
+ public static StandardPrimitiveObjectInspector getStandardPrimitiveObjectInspector(Class<?> c) {
+ c = ObjectInspectorUtils.generalizePrimitive(c);
+ StandardPrimitiveObjectInspector result = cachedStandardPrimitiveInspectorCache.get(c);
+ if (result == null) {
+ result = new StandardPrimitiveObjectInspector(c);
+ cachedStandardPrimitiveInspectorCache.put(c, result);
+ }
+ return result;
+ }
+
+ static HashMap<ObjectInspector, StandardListObjectInspector> cachedStandardListObjectInspector =
+ new HashMap<ObjectInspector, StandardListObjectInspector>();
+ public static StandardListObjectInspector getStandardListObjectInspector(ObjectInspector listElementObjectInspector) {
+ StandardListObjectInspector result = cachedStandardListObjectInspector.get(listElementObjectInspector);
+ if (result == null) {
+ result = new StandardListObjectInspector(listElementObjectInspector);
+ cachedStandardListObjectInspector.put(listElementObjectInspector, result);
+ }
+ return result;
+ }
+
+ static HashMap<List<ObjectInspector>, StandardMapObjectInspector> cachedStandardMapObjectInspector =
+ new HashMap<List<ObjectInspector>, StandardMapObjectInspector>();
+ public static StandardMapObjectInspector getStandardMapObjectInspector(ObjectInspector mapKeyObjectInspector, ObjectInspector mapValueObjectInspector) {
+ ArrayList<ObjectInspector> signature = new ArrayList<ObjectInspector>(2);
+ signature.add(mapKeyObjectInspector);
+ signature.add(mapValueObjectInspector);
+ StandardMapObjectInspector result = cachedStandardMapObjectInspector.get(signature);
+ if (result == null) {
+ result = new StandardMapObjectInspector(mapKeyObjectInspector, mapValueObjectInspector);
+ cachedStandardMapObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+
+ static HashMap<ArrayList<List<?>>, StandardStructObjectInspector> cachedStandardStructObjectInspector =
+ new HashMap<ArrayList<List<?>>, StandardStructObjectInspector>();
+ public static StandardStructObjectInspector getStandardStructObjectInspector(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
+ ArrayList<List<?>> signature = new ArrayList<List<?>>();
+ signature.add(structFieldNames);
+ signature.add(structFieldObjectInspectors);
+ StandardStructObjectInspector result = cachedStandardStructObjectInspector.get(signature);
+ if (result == null) {
+ result = new StandardStructObjectInspector(structFieldNames, structFieldObjectInspectors);
+ cachedStandardStructObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+
+ static HashMap<List<StructObjectInspector>, UnionStructObjectInspector> cachedUnionStructObjectInspector =
+ new HashMap<List<StructObjectInspector>, UnionStructObjectInspector>();
+ public static UnionStructObjectInspector getUnionStructObjectInspector(List<StructObjectInspector> structObjectInspectors) {
+ UnionStructObjectInspector result = cachedUnionStructObjectInspector.get(structObjectInspectors);
+ if (result == null) {
+ result = new UnionStructObjectInspector(structObjectInspectors);
+ cachedUnionStructObjectInspector.put(structObjectInspectors, result);
+ }
+ return result;
+ }
+
+
+}
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,225 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.objectinspector;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * ObjectInspectorFactory is the primary way to create new ObjectInspector instances.
+ *
+ * SerDe classes should call the static functions in this library to create an ObjectInspector
+ * to return to the caller of SerDe2.getObjectInspector().
+ */
+public class ObjectInspectorUtils {
+
+ /** This function defines the list of PrimitiveClasses that we support.
+ * A PrimitiveClass should support java serialization/deserialization.
+ */
+ public static boolean isPrimitiveClass(Class<?> c) {
+ return ((c == String.class) || (c == Boolean.class) ||
+ (c == Character.class) || (c == java.sql.Date.class) ||
+ java.lang.Number.class.isAssignableFrom(c) ||
+ c.isPrimitive());
+ }
+
+ /**
+ * Generalize the Java primitive types to the corresponding
+ * Java Classes.
+ */
+ public static Class<?> generalizePrimitive(Class<?> primitiveClass) {
+ if (primitiveClass == Boolean.TYPE) primitiveClass = Boolean.class;
+ if (primitiveClass == Byte.TYPE) primitiveClass = Byte.class;
+ if (primitiveClass == Character.TYPE) primitiveClass = Character.class;
+ if (primitiveClass == Short.TYPE) primitiveClass = Short.class;
+ if (primitiveClass == Integer.TYPE) primitiveClass = Integer.class;
+ if (primitiveClass == Long.TYPE) primitiveClass = Long.class;
+ if (primitiveClass == Float.TYPE) primitiveClass = Float.class;
+ if (primitiveClass == Double.TYPE) primitiveClass = Double.class;
+ if (primitiveClass == Void.TYPE) primitiveClass = Void.class;
+ return primitiveClass;
+ }
+
+
+ static ArrayList<ArrayList<String>> integerArrayCache = new ArrayList<ArrayList<String>>();
+ /**
+ * Returns an array of Integer strings, starting from "0".
+ * This function caches the arrays to provide a better performance.
+ */
+ public static ArrayList<String> getIntegerArray(int size) {
+ while (integerArrayCache.size() <= size) {
+ integerArrayCache.add(null);
+ }
+ ArrayList<String> result = integerArrayCache.get(size);
+ if (result == null) {
+ result = new ArrayList<String>();
+ for (int i=0; i<size; i++) {
+ result.add(Integer.valueOf(i).toString());
+ }
+ integerArrayCache.set(size, result);
+ }
+ return result;
+ }
+
+ static ArrayList<String> integerCSVCache = new ArrayList<String>();
+ public static String getIntegerCSV(int size) {
+ while (integerCSVCache.size() <= size) {
+ integerCSVCache.add(null);
+ }
+ String result = integerCSVCache.get(size);
+ if (result == null) {
+ StringBuilder sb = new StringBuilder();
+ for(int i=0; i<size; i++) {
+ if (i>0) sb.append(",");
+ sb.append("" + i);
+ }
+ result = sb.toString();
+ integerCSVCache.set(size, result);
+ }
+ return result;
+ }
+
+
+ /**
+ * Get the standard ObjectInspector for an ObjectInspector.
+ *
+ * The returned ObjectInspector can be used to inspect the standard object.
+ */
+ public static ObjectInspector getStandardObjectInspector(ObjectInspector oi) {
+ ObjectInspector result = null;
+ switch (oi.getCategory()) {
+ case PRIMITIVE: {
+ PrimitiveObjectInspector poi =(PrimitiveObjectInspector)oi;
+ result = poi;
+ break;
+ }
+ case LIST: {
+ ListObjectInspector loi = (ListObjectInspector)oi;
+ result = ObjectInspectorFactory.getStandardListObjectInspector(loi.getListElementObjectInspector());
+ break;
+ }
+ case MAP: {
+ MapObjectInspector moi = (MapObjectInspector)oi;
+ result = ObjectInspectorFactory.getStandardMapObjectInspector(
+ moi.getMapKeyObjectInspector(),
+ moi.getMapValueObjectInspector());
+ break;
+ }
+ case STRUCT: {
+ StructObjectInspector soi = (StructObjectInspector)oi;
+ List<? extends StructField> fields = soi.getAllStructFieldRefs();
+ List<String> fieldNames = new ArrayList<String>(fields.size());
+ List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fields.size());
+ for(StructField f : fields) {
+ fieldNames.add(f.getFieldName());
+ fieldObjectInspectors.add(f.getFieldObjectInspector());
+ }
+ result = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldObjectInspectors);
+ break;
+ }
+ default: {
+ throw new RuntimeException("Unknown ObjectInspector category!");
+ }
+ }
+ return result;
+ }
+
+ // TODO: should return o if the ObjectInspector is a standard ObjectInspector hierarchy
+ // (all internal ObjectInspector needs to be standard ObjectInspectors)
+ public static Object getStandardObject(Object o, ObjectInspector oi) {
+ if (o == null) {
+ return null;
+ }
+
+ Object result = null;
+ switch (oi.getCategory()) {
+ case PRIMITIVE: {
+ result = o;
+ break;
+ }
+ case LIST: {
+ ListObjectInspector loi = (ListObjectInspector)oi;
+ int length = loi.getListLength(o);
+ ArrayList<Object> list = new ArrayList<Object>(length);
+ for(int i=0; i<length; i++) {
+ list.add(getStandardObject(
+ loi.getListElement(o, i),
+ loi.getListElementObjectInspector()));
+ }
+ result = list;
+ break;
+ }
+ case MAP: {
+ MapObjectInspector moi = (MapObjectInspector)oi;
+ HashMap<Object, Object> map = new HashMap<Object, Object>();
+ Map<? extends Object, ? extends Object> omap = moi.getMap(o);
+ for(Map.Entry<? extends Object, ? extends Object> entry: omap.entrySet()) {
+ map.put(getStandardObject(entry.getKey(), moi.getMapKeyObjectInspector()),
+ getStandardObject(entry.getValue(), moi.getMapValueObjectInspector()));
+ }
+ result = map;
+ break;
+ }
+ case STRUCT: {
+ StructObjectInspector soi = (StructObjectInspector)oi;
+ List<? extends StructField> fields = soi.getAllStructFieldRefs();
+ ArrayList<Object> struct = new ArrayList<Object>(fields.size());
+ for(StructField f : fields) {
+ struct.add(getStandardObject(soi.getStructFieldData(o, f), f.getFieldObjectInspector()));
+ }
+ result = struct;
+ break;
+ }
+ default: {
+ throw new RuntimeException("Unknown ObjectInspector category!");
+ }
+ }
+ return result;
+ }
+
+ public static String getStandardStructTypeName(StructObjectInspector soi) {
+ StringBuilder sb = new StringBuilder();
+ sb.append("struct{");
+ List<? extends StructField> fields = soi.getAllStructFieldRefs();
+ for(int i=0; i<fields.size(); i++) {
+ if (i>0) sb.append(",");
+ sb.append(fields.get(i).getFieldName());
+ sb.append(":");
+ sb.append(fields.get(i).getFieldObjectInspector().getTypeName());
+ }
+ sb.append("}");
+ return sb.toString();
+ }
+
+ public static StructField getStandardStructFieldRef(String fieldName, List<? extends StructField> fields) {
+ fieldName = fieldName.toLowerCase();
+ for(int i=0; i<fields.size(); i++) {
+ if (fields.get(i).getFieldName().equals(fieldName)) {
+ return fields.get(i);
+ }
+ }
+ throw new RuntimeException("cannot find field " + fieldName + " from " + fields);
+ // return null;
+ }
+
+
+
+}
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,24 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.objectinspector;
+
+public interface PrimitiveObjectInspector extends ObjectInspector{
+
+ public Class<?> getPrimitiveClass();
+
+}