You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hcatalog-commits@incubator.apache.org by tr...@apache.org on 2012/09/10 23:29:03 UTC
svn commit: r1383152 [8/27] - in /incubator/hcatalog/trunk: ./
hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/
hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/drivers/
hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/ s...
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordObjectInspectorFactory.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordObjectInspectorFactory.java?rev=1383152&r1=1383151&r2=1383152&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordObjectInspectorFactory.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordObjectInspectorFactory.java Mon Sep 10 23:28:55 2012
@@ -38,94 +38,94 @@ import org.slf4j.LoggerFactory;
*/
public class HCatRecordObjectInspectorFactory {
- private final static Logger LOG = LoggerFactory.getLogger(HCatRecordObjectInspectorFactory.class);
+ private final static Logger LOG = LoggerFactory.getLogger(HCatRecordObjectInspectorFactory.class);
- static HashMap<TypeInfo, HCatRecordObjectInspector> cachedHCatRecordObjectInspectors =
- new HashMap<TypeInfo, HCatRecordObjectInspector>();
- static HashMap<TypeInfo, ObjectInspector> cachedObjectInspectors =
- new HashMap<TypeInfo, ObjectInspector>();
-
- /**
- * Returns HCatRecordObjectInspector given a StructTypeInfo type definition for the record to look into
- * @param typeInfo Type definition for the record to look into
- * @return appropriate HCatRecordObjectInspector
- * @throws SerDeException
- */
- public static HCatRecordObjectInspector getHCatRecordObjectInspector(
- StructTypeInfo typeInfo) throws SerDeException {
- HCatRecordObjectInspector oi = cachedHCatRecordObjectInspectors.get(typeInfo);
- if (oi == null) {
-
- LOG.debug("Got asked for OI for {} [{} ]",typeInfo.getCategory(),typeInfo.getTypeName());
- switch (typeInfo.getCategory()) {
- case STRUCT :
- StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
- List<String> fieldNames = structTypeInfo.getAllStructFieldNames();
- List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
- List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fieldTypeInfos.size());
- for (int i = 0; i < fieldTypeInfos.size(); i++) {
- fieldObjectInspectors.add(getStandardObjectInspectorFromTypeInfo(fieldTypeInfos.get(i)));
+ static HashMap<TypeInfo, HCatRecordObjectInspector> cachedHCatRecordObjectInspectors =
+ new HashMap<TypeInfo, HCatRecordObjectInspector>();
+ static HashMap<TypeInfo, ObjectInspector> cachedObjectInspectors =
+ new HashMap<TypeInfo, ObjectInspector>();
+
+ /**
+ * Returns HCatRecordObjectInspector given a StructTypeInfo type definition for the record to look into
+ * @param typeInfo Type definition for the record to look into
+ * @return appropriate HCatRecordObjectInspector
+ * @throws SerDeException
+ */
+ public static HCatRecordObjectInspector getHCatRecordObjectInspector(
+ StructTypeInfo typeInfo) throws SerDeException {
+ HCatRecordObjectInspector oi = cachedHCatRecordObjectInspectors.get(typeInfo);
+ if (oi == null) {
+
+ LOG.debug("Got asked for OI for {} [{} ]", typeInfo.getCategory(), typeInfo.getTypeName());
+ switch (typeInfo.getCategory()) {
+ case STRUCT:
+ StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
+ List<String> fieldNames = structTypeInfo.getAllStructFieldNames();
+ List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
+ List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fieldTypeInfos.size());
+ for (int i = 0; i < fieldTypeInfos.size(); i++) {
+ fieldObjectInspectors.add(getStandardObjectInspectorFromTypeInfo(fieldTypeInfos.get(i)));
+ }
+ oi = new HCatRecordObjectInspector(fieldNames, fieldObjectInspectors);
+
+ break;
+ default:
+ // Hmm.. not good,
+ // the only type expected here is STRUCT, which maps to HCatRecord
+ // - anything else is an error. Return null as the inspector.
+ throw new SerDeException("TypeInfo [" + typeInfo.getTypeName()
+ + "] was not of struct type - HCatRecord expected struct type, got ["
+ + typeInfo.getCategory().toString() + "]");
+ }
+ cachedHCatRecordObjectInspectors.put(typeInfo, oi);
}
- oi = new HCatRecordObjectInspector(fieldNames,fieldObjectInspectors);
-
- break;
- default:
- // Hmm.. not good,
- // the only type expected here is STRUCT, which maps to HCatRecord
- // - anything else is an error. Return null as the inspector.
- throw new SerDeException("TypeInfo ["+typeInfo.getTypeName()
- + "] was not of struct type - HCatRecord expected struct type, got ["
- + typeInfo.getCategory().toString()+"]");
- }
- cachedHCatRecordObjectInspectors.put(typeInfo, oi);
+ return oi;
}
- return oi;
- }
- public static ObjectInspector getStandardObjectInspectorFromTypeInfo(TypeInfo typeInfo) {
+ public static ObjectInspector getStandardObjectInspectorFromTypeInfo(TypeInfo typeInfo) {
- ObjectInspector oi = cachedObjectInspectors.get(typeInfo);
- if (oi == null){
+ ObjectInspector oi = cachedObjectInspectors.get(typeInfo);
+ if (oi == null) {
- LOG.debug("Got asked for OI for {}, [{}]",typeInfo.getCategory(), typeInfo.getTypeName());
- switch (typeInfo.getCategory()) {
- case PRIMITIVE:
- oi = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
- ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory());
- break;
- case STRUCT:
- StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
- List<String> fieldNames = structTypeInfo.getAllStructFieldNames();
- List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
- List<ObjectInspector> fieldObjectInspectors =
- new ArrayList<ObjectInspector>(fieldTypeInfos.size());
- for (int i = 0; i < fieldTypeInfos.size(); i++) {
- fieldObjectInspectors.add(getStandardObjectInspectorFromTypeInfo(fieldTypeInfos.get(i)));
+ LOG.debug("Got asked for OI for {}, [{}]", typeInfo.getCategory(), typeInfo.getTypeName());
+ switch (typeInfo.getCategory()) {
+ case PRIMITIVE:
+ oi = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
+ ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory());
+ break;
+ case STRUCT:
+ StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
+ List<String> fieldNames = structTypeInfo.getAllStructFieldNames();
+ List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
+ List<ObjectInspector> fieldObjectInspectors =
+ new ArrayList<ObjectInspector>(fieldTypeInfos.size());
+ for (int i = 0; i < fieldTypeInfos.size(); i++) {
+ fieldObjectInspectors.add(getStandardObjectInspectorFromTypeInfo(fieldTypeInfos.get(i)));
+ }
+ oi = ObjectInspectorFactory.getStandardStructObjectInspector(
+ fieldNames, fieldObjectInspectors
+ );
+ break;
+ case LIST:
+ ObjectInspector elementObjectInspector = getStandardObjectInspectorFromTypeInfo(
+ ((ListTypeInfo) typeInfo).getListElementTypeInfo());
+ oi = ObjectInspectorFactory.getStandardListObjectInspector(elementObjectInspector);
+ break;
+ case MAP:
+ ObjectInspector keyObjectInspector = getStandardObjectInspectorFromTypeInfo(
+ ((MapTypeInfo) typeInfo).getMapKeyTypeInfo());
+ ObjectInspector valueObjectInspector = getStandardObjectInspectorFromTypeInfo(
+ ((MapTypeInfo) typeInfo).getMapValueTypeInfo());
+ oi = ObjectInspectorFactory.getStandardMapObjectInspector(keyObjectInspector, valueObjectInspector);
+ break;
+ default:
+ oi = null;
+ }
+ cachedObjectInspectors.put(typeInfo, oi);
}
- oi = ObjectInspectorFactory.getStandardStructObjectInspector(
- fieldNames, fieldObjectInspectors
- );
- break;
- case LIST:
- ObjectInspector elementObjectInspector = getStandardObjectInspectorFromTypeInfo(
- ((ListTypeInfo)typeInfo).getListElementTypeInfo());
- oi = ObjectInspectorFactory.getStandardListObjectInspector(elementObjectInspector);
- break;
- case MAP:
- ObjectInspector keyObjectInspector = getStandardObjectInspectorFromTypeInfo(
- ((MapTypeInfo)typeInfo).getMapKeyTypeInfo());
- ObjectInspector valueObjectInspector = getStandardObjectInspectorFromTypeInfo(
- ((MapTypeInfo)typeInfo).getMapValueTypeInfo());
- oi = ObjectInspectorFactory.getStandardMapObjectInspector(keyObjectInspector,valueObjectInspector);
- break;
- default:
- oi = null;
- }
- cachedObjectInspectors.put(typeInfo, oi);
+ return oi;
}
- return oi;
- }
}
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java?rev=1383152&r1=1383151&r2=1383152&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java Mon Sep 10 23:28:55 2012
@@ -54,254 +54,254 @@ public class HCatRecordSerDe implements
private static final Logger LOG = LoggerFactory.getLogger(HCatRecordSerDe.class);
- public HCatRecordSerDe() throws SerDeException{
- }
+ public HCatRecordSerDe() throws SerDeException {
+ }
- private List<String> columnNames;
- private List<TypeInfo> columnTypes;
- private StructTypeInfo rowTypeInfo;
-
- private HCatRecordObjectInspector cachedObjectInspector;
-
- @Override
- public void initialize(Configuration conf, Properties tbl)
- throws SerDeException {
-
- LOG.debug("Initializing HCatRecordSerDe");
- LOG.debug("props to serde: {}",tbl.entrySet());
-
- // Get column names and types
- String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
- String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
-
- // all table column names
- if (columnNameProperty.length() == 0) {
- columnNames = new ArrayList<String>();
- } else {
- columnNames = Arrays.asList(columnNameProperty.split(","));
- }
-
- // all column types
- if (columnTypeProperty.length() == 0) {
- columnTypes = new ArrayList<TypeInfo>();
- } else {
- columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
- }
-
-
- LOG.debug("columns: {} {}",columnNameProperty,columnNames);
- LOG.debug("types: {} {}", columnTypeProperty, columnTypes);
- assert (columnNames.size() == columnTypes.size());
-
- rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
- cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);
- }
-
- public void initialize(HCatSchema hsch) throws SerDeException {
-
- LOG.debug("Initializing HCatRecordSerDe through HCatSchema {}." ,hsch);
-
- rowTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(hsch.getSchemaAsTypeString());
- cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);
-
- }
-
-
- /**
- * The purpose of a deserialize method is to turn a data blob
- * which is a writable representation of the data into an
- * object that can then be parsed using the appropriate
- * ObjectInspector. In this case, since HCatRecord is directly
- * already the Writable object, there's no extra work to be done
- * here. Most of the logic resides in the ObjectInspector to be
- * able to return values from within the HCatRecord to hive when
- * it wants it.
- */
- @Override
- public Object deserialize(Writable data) throws SerDeException {
- if (!(data instanceof HCatRecord)) {
- throw new SerDeException(getClass().getName() + ": expects HCatRecord!");
- }
-
- return (HCatRecord) data;
- }
-
- /**
- * The purpose of the serialize method is to turn an object-representation
- * with a provided ObjectInspector into a Writable format, which
- * the underlying layer can then use to write out.
- *
- * In this case, it means that Hive will call this method to convert
- * an object with appropriate objectinspectors that it knows about,
- * to write out a HCatRecord.
- */
- @Override
- public Writable serialize(Object obj, ObjectInspector objInspector)
- throws SerDeException {
- if (objInspector.getCategory() != Category.STRUCT) {
- throw new SerDeException(getClass().toString()
- + " can only serialize struct types, but we got: "
- + objInspector.getTypeName());
- }
- return new DefaultHCatRecord((List<Object>)serializeStruct(obj,(StructObjectInspector)objInspector));
- }
-
-
- /**
- * Return serialized HCatRecord from an underlying
- * object-representation, and readable by an ObjectInspector
- * @param obj : Underlying object-representation
- * @param soi : StructObjectInspector
- * @return HCatRecord
- */
- private static List<?> serializeStruct(Object obj, StructObjectInspector soi)
- throws SerDeException {
-
- List<? extends StructField> fields = soi.getAllStructFieldRefs();
- List<Object> list = soi.getStructFieldsDataAsList(obj);
-
- if (list == null){
- return null;
- }
-
- List<Object> l = new ArrayList<Object>(fields.size());
-
- if (fields != null){
- for (int i = 0; i < fields.size(); i++) {
-
- // Get the field objectInspector and the field object.
- ObjectInspector foi = fields.get(i).getFieldObjectInspector();
- Object f = list.get(i);
- Object res = serializeField(f, foi);
- l.add(i, res);
- }
- }
- return l;
- }
-
- /**
- * Return underlying Java Object from an object-representation
- * that is readable by a provided ObjectInspector.
- */
- public static Object serializeField(Object field, ObjectInspector fieldObjectInspector)
- throws SerDeException {
-
- Object res;
- if (fieldObjectInspector.getCategory() == Category.PRIMITIVE){
- if (field != null && field instanceof Boolean &&
- HCatContext.getInstance().getConf().getBoolean(
- HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER,
- HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT)) {
- res = ((Boolean) field) ? 1 : 0;
- } else if (field != null && field instanceof Short &&
- HCatContext.getInstance().getConf().getBoolean(
- HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION,
- HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)) {
- res = new Integer((Short) field);
- } else if (field != null && field instanceof Byte &&
- HCatContext.getInstance().getConf().getBoolean(
- HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION,
- HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)) {
- res = new Integer((Byte) field);
- } else {
- res = ((PrimitiveObjectInspector) fieldObjectInspector).getPrimitiveJavaObject(field);
- }
- } else if (fieldObjectInspector.getCategory() == Category.STRUCT){
- res = serializeStruct(field,(StructObjectInspector)fieldObjectInspector);
- } else if (fieldObjectInspector.getCategory() == Category.LIST){
- res = serializeList(field,(ListObjectInspector)fieldObjectInspector);
- } else if (fieldObjectInspector.getCategory() == Category.MAP){
- res = serializeMap(field,(MapObjectInspector)fieldObjectInspector);
- } else {
- throw new SerDeException(HCatRecordSerDe.class.toString()
- + " does not know what to do with fields of unknown category: "
- + fieldObjectInspector.getCategory() + " , type: " + fieldObjectInspector.getTypeName());
- }
- return res;
- }
-
- /**
- * Helper method to return underlying Java Map from
- * an object-representation that is readable by a provided
- * MapObjectInspector
- */
- private static Map<?,?> serializeMap(Object f, MapObjectInspector moi) throws SerDeException {
- ObjectInspector koi = moi.getMapKeyObjectInspector();
- ObjectInspector voi = moi.getMapValueObjectInspector();
- Map<Object,Object> m = new TreeMap<Object, Object>();
-
- Map<?, ?> readMap = moi.getMap(f);
- if (readMap == null) {
- return null;
- } else {
- for (Map.Entry<?, ?> entry: readMap.entrySet()) {
- m.put(serializeField(entry.getKey(),koi), serializeField(entry.getValue(),voi));
- }
- }
- return m;
- }
-
- private static List<?> serializeList(Object f, ListObjectInspector loi) throws SerDeException {
- List l = loi.getList(f);
- if (l == null){
- return null;
- }
-
- ObjectInspector eloi = loi.getListElementObjectInspector();
- if (eloi.getCategory() == Category.PRIMITIVE){
- List<Object> list = new ArrayList<Object>(l.size());
- for(int i = 0; i < l.size(); i++){
- list.add(((PrimitiveObjectInspector)eloi).getPrimitiveJavaObject(l.get(i)));
- }
- return list;
- } else if (eloi.getCategory() == Category.STRUCT){
- List<List<?>> list = new ArrayList<List<?>>(l.size());
- for (int i = 0 ; i < l.size() ; i++ ){
- list.add(serializeStruct(l.get(i), (StructObjectInspector) eloi));
- }
- return list;
- } else if (eloi.getCategory() == Category.LIST){
- List<List<?>> list = new ArrayList<List<?>>(l.size());
- for (int i = 0 ; i < l.size() ; i++ ){
- list.add(serializeList(l.get(i), (ListObjectInspector) eloi));
- }
- return list;
- } else if (eloi.getCategory() == Category.MAP){
- List<Map<?,?>> list = new ArrayList<Map<?,?>>(l.size());
- for (int i = 0 ; i < l.size() ; i++ ){
- list.add(serializeMap(l.get(i), (MapObjectInspector) eloi));
- }
- return list;
- } else {
- throw new SerDeException(HCatRecordSerDe.class.toString()
- + " does not know what to do with fields of unknown category: "
- + eloi.getCategory() + " , type: " + eloi.getTypeName());
- }
- }
-
-
- /**
- * Return an object inspector that can read through the object
- * that we return from deserialize(). To wit, that means we need
- * to return an ObjectInspector that can read HCatRecord, given
- * the type info for it during initialize(). This also means
- * that this method cannot and should not be called before initialize()
- */
- @Override
- public ObjectInspector getObjectInspector() throws SerDeException {
- return (ObjectInspector) cachedObjectInspector;
- }
-
- @Override
- public Class<? extends Writable> getSerializedClass() {
- return HCatRecord.class;
- }
-
- @Override
- public SerDeStats getSerDeStats() {
- // no support for statistics yet
- return null;
- }
+ private List<String> columnNames;
+ private List<TypeInfo> columnTypes;
+ private StructTypeInfo rowTypeInfo;
+
+ private HCatRecordObjectInspector cachedObjectInspector;
+
+ @Override
+ public void initialize(Configuration conf, Properties tbl)
+ throws SerDeException {
+
+ LOG.debug("Initializing HCatRecordSerDe");
+ LOG.debug("props to serde: {}", tbl.entrySet());
+
+ // Get column names and types
+ String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
+ String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
+
+ // all table column names
+ if (columnNameProperty.length() == 0) {
+ columnNames = new ArrayList<String>();
+ } else {
+ columnNames = Arrays.asList(columnNameProperty.split(","));
+ }
+
+ // all column types
+ if (columnTypeProperty.length() == 0) {
+ columnTypes = new ArrayList<TypeInfo>();
+ } else {
+ columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
+ }
+
+
+ LOG.debug("columns: {} {}", columnNameProperty, columnNames);
+ LOG.debug("types: {} {}", columnTypeProperty, columnTypes);
+ assert (columnNames.size() == columnTypes.size());
+
+ rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
+ cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);
+ }
+
+ public void initialize(HCatSchema hsch) throws SerDeException {
+
+ LOG.debug("Initializing HCatRecordSerDe through HCatSchema {}.", hsch);
+
+ rowTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(hsch.getSchemaAsTypeString());
+ cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);
+
+ }
+
+
+ /**
+ * The purpose of a deserialize method is to turn a data blob
+ * which is a writable representation of the data into an
+ * object that can then be parsed using the appropriate
+ * ObjectInspector. In this case, since HCatRecord is directly
+ * already the Writable object, there's no extra work to be done
+ * here. Most of the logic resides in the ObjectInspector to be
+ * able to return values from within the HCatRecord to hive when
+ * it wants it.
+ */
+ @Override
+ public Object deserialize(Writable data) throws SerDeException {
+ if (!(data instanceof HCatRecord)) {
+ throw new SerDeException(getClass().getName() + ": expects HCatRecord!");
+ }
+
+ return (HCatRecord) data;
+ }
+
+ /**
+ * The purpose of the serialize method is to turn an object-representation
+ * with a provided ObjectInspector into a Writable format, which
+ * the underlying layer can then use to write out.
+ *
+ * In this case, it means that Hive will call this method to convert
+ * an object with appropriate objectinspectors that it knows about,
+ * to write out a HCatRecord.
+ */
+ @Override
+ public Writable serialize(Object obj, ObjectInspector objInspector)
+ throws SerDeException {
+ if (objInspector.getCategory() != Category.STRUCT) {
+ throw new SerDeException(getClass().toString()
+ + " can only serialize struct types, but we got: "
+ + objInspector.getTypeName());
+ }
+ return new DefaultHCatRecord((List<Object>) serializeStruct(obj, (StructObjectInspector) objInspector));
+ }
+
+
+ /**
+ * Return serialized HCatRecord from an underlying
+ * object-representation, and readable by an ObjectInspector
+ * @param obj : Underlying object-representation
+ * @param soi : StructObjectInspector
+ * @return HCatRecord
+ */
+ private static List<?> serializeStruct(Object obj, StructObjectInspector soi)
+ throws SerDeException {
+
+ List<? extends StructField> fields = soi.getAllStructFieldRefs();
+ List<Object> list = soi.getStructFieldsDataAsList(obj);
+
+ if (list == null) {
+ return null;
+ }
+
+ List<Object> l = new ArrayList<Object>(fields.size());
+
+ if (fields != null) {
+ for (int i = 0; i < fields.size(); i++) {
+
+ // Get the field objectInspector and the field object.
+ ObjectInspector foi = fields.get(i).getFieldObjectInspector();
+ Object f = list.get(i);
+ Object res = serializeField(f, foi);
+ l.add(i, res);
+ }
+ }
+ return l;
+ }
+
+ /**
+ * Return underlying Java Object from an object-representation
+ * that is readable by a provided ObjectInspector.
+ */
+ public static Object serializeField(Object field, ObjectInspector fieldObjectInspector)
+ throws SerDeException {
+
+ Object res;
+ if (fieldObjectInspector.getCategory() == Category.PRIMITIVE) {
+ if (field != null && field instanceof Boolean &&
+ HCatContext.getInstance().getConf().getBoolean(
+ HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER,
+ HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT)) {
+ res = ((Boolean) field) ? 1 : 0;
+ } else if (field != null && field instanceof Short &&
+ HCatContext.getInstance().getConf().getBoolean(
+ HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION,
+ HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)) {
+ res = new Integer((Short) field);
+ } else if (field != null && field instanceof Byte &&
+ HCatContext.getInstance().getConf().getBoolean(
+ HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION,
+ HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)) {
+ res = new Integer((Byte) field);
+ } else {
+ res = ((PrimitiveObjectInspector) fieldObjectInspector).getPrimitiveJavaObject(field);
+ }
+ } else if (fieldObjectInspector.getCategory() == Category.STRUCT) {
+ res = serializeStruct(field, (StructObjectInspector) fieldObjectInspector);
+ } else if (fieldObjectInspector.getCategory() == Category.LIST) {
+ res = serializeList(field, (ListObjectInspector) fieldObjectInspector);
+ } else if (fieldObjectInspector.getCategory() == Category.MAP) {
+ res = serializeMap(field, (MapObjectInspector) fieldObjectInspector);
+ } else {
+ throw new SerDeException(HCatRecordSerDe.class.toString()
+ + " does not know what to do with fields of unknown category: "
+ + fieldObjectInspector.getCategory() + " , type: " + fieldObjectInspector.getTypeName());
+ }
+ return res;
+ }
+
+ /**
+ * Helper method to return underlying Java Map from
+ * an object-representation that is readable by a provided
+ * MapObjectInspector
+ */
+ private static Map<?, ?> serializeMap(Object f, MapObjectInspector moi) throws SerDeException {
+ ObjectInspector koi = moi.getMapKeyObjectInspector();
+ ObjectInspector voi = moi.getMapValueObjectInspector();
+ Map<Object, Object> m = new TreeMap<Object, Object>();
+
+ Map<?, ?> readMap = moi.getMap(f);
+ if (readMap == null) {
+ return null;
+ } else {
+ for (Map.Entry<?, ?> entry : readMap.entrySet()) {
+ m.put(serializeField(entry.getKey(), koi), serializeField(entry.getValue(), voi));
+ }
+ }
+ return m;
+ }
+
+ private static List<?> serializeList(Object f, ListObjectInspector loi) throws SerDeException {
+ List l = loi.getList(f);
+ if (l == null) {
+ return null;
+ }
+
+ ObjectInspector eloi = loi.getListElementObjectInspector();
+ if (eloi.getCategory() == Category.PRIMITIVE) {
+ List<Object> list = new ArrayList<Object>(l.size());
+ for (int i = 0; i < l.size(); i++) {
+ list.add(((PrimitiveObjectInspector) eloi).getPrimitiveJavaObject(l.get(i)));
+ }
+ return list;
+ } else if (eloi.getCategory() == Category.STRUCT) {
+ List<List<?>> list = new ArrayList<List<?>>(l.size());
+ for (int i = 0; i < l.size(); i++) {
+ list.add(serializeStruct(l.get(i), (StructObjectInspector) eloi));
+ }
+ return list;
+ } else if (eloi.getCategory() == Category.LIST) {
+ List<List<?>> list = new ArrayList<List<?>>(l.size());
+ for (int i = 0; i < l.size(); i++) {
+ list.add(serializeList(l.get(i), (ListObjectInspector) eloi));
+ }
+ return list;
+ } else if (eloi.getCategory() == Category.MAP) {
+ List<Map<?, ?>> list = new ArrayList<Map<?, ?>>(l.size());
+ for (int i = 0; i < l.size(); i++) {
+ list.add(serializeMap(l.get(i), (MapObjectInspector) eloi));
+ }
+ return list;
+ } else {
+ throw new SerDeException(HCatRecordSerDe.class.toString()
+ + " does not know what to do with fields of unknown category: "
+ + eloi.getCategory() + " , type: " + eloi.getTypeName());
+ }
+ }
+
+
+ /**
+ * Return an object inspector that can read through the object
+ * that we return from deserialize(). To wit, that means we need
+ * to return an ObjectInspector that can read HCatRecord, given
+ * the type info for it during initialize(). This also means
+ * that this method cannot and should not be called before initialize()
+ */
+ @Override
+ public ObjectInspector getObjectInspector() throws SerDeException {
+ return (ObjectInspector) cachedObjectInspector;
+ }
+
+ @Override
+ public Class<? extends Writable> getSerializedClass() {
+ return HCatRecord.class;
+ }
+
+ @Override
+ public SerDeStats getSerDeStats() {
+ // no support for statistics yet
+ return null;
+ }
}
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordable.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordable.java?rev=1383152&r1=1383151&r2=1383152&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordable.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordable.java Mon Sep 10 23:28:55 2012
@@ -26,30 +26,30 @@ import org.apache.hadoop.io.Writable;
*/
public interface HCatRecordable extends Writable {
- /**
- * Gets the field at the specified index.
- * @param fieldNum the field number
- * @return the object at the specified index
- */
- Object get(int fieldNum);
+ /**
+ * Gets the field at the specified index.
+ * @param fieldNum the field number
+ * @return the object at the specified index
+ */
+ Object get(int fieldNum);
- /**
- * Gets all the fields of the hcat record.
- * @return the list of fields
- */
- List<Object> getAll();
+ /**
+ * Gets all the fields of the hcat record.
+ * @return the list of fields
+ */
+ List<Object> getAll();
- /**
- * Sets the field at the specified index.
- * @param fieldNum the field number
- * @param value the value to set
- */
- void set(int fieldNum, Object value);
+ /**
+ * Sets the field at the specified index.
+ * @param fieldNum the field number
+ * @param value the value to set
+ */
+ void set(int fieldNum, Object value);
- /**
- * Gets the size of the hcat record.
- * @return the size
- */
- int size();
+ /**
+ * Gets the size of the hcat record.
+ * @return the size
+ */
+ int size();
}
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/JsonSerDe.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/JsonSerDe.java?rev=1383152&r1=1383151&r2=1383152&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/JsonSerDe.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/JsonSerDe.java Mon Sep 10 23:28:55 2012
@@ -72,503 +72,503 @@ import org.slf4j.LoggerFactory;
public class JsonSerDe implements SerDe {
- private static final Logger LOG = LoggerFactory.getLogger(JsonSerDe.class);
- private List<String> columnNames;
- private List<TypeInfo> columnTypes;
-
- private StructTypeInfo rowTypeInfo;
- private HCatSchema schema;
-
- private JsonFactory jsonFactory = null;
-
- private HCatRecordObjectInspector cachedObjectInspector;
-
- @Override
- public void initialize(Configuration conf, Properties tbl)
- throws SerDeException {
-
-
- LOG.debug("Initializing JsonSerDe");
- LOG.debug("props to serde: {}",tbl.entrySet());
-
-
- // Get column names and types
- String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
- String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
-
- // all table column names
- if (columnNameProperty.length() == 0) {
- columnNames = new ArrayList<String>();
- } else {
- columnNames = Arrays.asList(columnNameProperty.split(","));
- }
-
- // all column types
- if (columnTypeProperty.length() == 0) {
- columnTypes = new ArrayList<TypeInfo>();
- } else {
- columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
- }
-
- LOG.debug("columns: {}, {}" , columnNameProperty, columnNames);
- LOG.debug("types: {}, {} ", columnTypeProperty, columnTypes);
-
- assert (columnNames.size() == columnTypes.size());
-
- rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
-
- cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);
- try {
- schema = HCatSchemaUtils.getHCatSchema(rowTypeInfo).get(0).getStructSubSchema();
- LOG.debug("schema : {}", schema);
- LOG.debug("fields : {}", schema.getFieldNames());
- } catch (HCatException e) {
- throw new SerDeException(e);
- }
-
- jsonFactory = new JsonFactory();
- }
-
- /**
- * Takes JSON string in Text form, and has to return an object representation above
- * it that's readable by the corresponding object inspector.
- *
- * For this implementation, since we're using the jackson parser, we can construct
- * our own object implementation, and we use HCatRecord for it
- */
- @Override
- public Object deserialize(Writable blob) throws SerDeException {
-
- Text t = (Text)blob;
- JsonParser p;
- List<Object> r = new ArrayList<Object>(Collections.nCopies(columnNames.size(), null));
- try {
- p = jsonFactory.createJsonParser(new ByteArrayInputStream((t.getBytes())));
- if (p.nextToken() != JsonToken.START_OBJECT) {
- throw new IOException("Start token not found where expected");
- }
- JsonToken token;
- while( ((token = p.nextToken()) != JsonToken.END_OBJECT)&&(token != null)){
- // iterate through each token, and create appropriate object here.
- populateRecord(r,token,p,schema);
- }
- } catch (JsonParseException e) {
- LOG.warn("Error [{}] parsing json text [{}].", e, t);
- LOG.debug(null,e);
- throw new SerDeException(e);
- } catch (IOException e) {
- LOG.warn("Error [{}] parsing json text [{}].", e, t);
- LOG.debug(null,e);
- throw new SerDeException(e);
- }
-
- return new DefaultHCatRecord(r);
- }
-
- private void populateRecord(List<Object> r, JsonToken token, JsonParser p, HCatSchema s) throws IOException {
- if (token != JsonToken.FIELD_NAME){
- throw new IOException("Field name expected");
- }
- String fieldName = p.getText();
- int fpos;
- try {
- fpos = s.getPosition(fieldName);
- } catch (NullPointerException npe){
- fpos = getPositionFromHiveInternalColumnName(fieldName);
- LOG.debug("NPE finding position for field [{}] in schema [{}]",fieldName,s);
- if (!fieldName.equalsIgnoreCase(getHiveInternalColumnName(fpos))){
- LOG.error("Hive internal column name {} and position "
- +"encoding {} for the column name are at odds",fieldName,fpos);
- throw npe;
- }
- if (fpos == -1){
- return; // unknown field, we return.
- }
- }
- HCatFieldSchema hcatFieldSchema = s.getFields().get(fpos);
- Object currField = extractCurrentField(p, null, hcatFieldSchema,false);
- r.set(fpos,currField);
- }
-
- public String getHiveInternalColumnName(int fpos) {
- return HiveConf.getColumnInternalName(fpos);
- }
+ private static final Logger LOG = LoggerFactory.getLogger(JsonSerDe.class);
+ private List<String> columnNames;
+ private List<TypeInfo> columnTypes;
- public int getPositionFromHiveInternalColumnName(String internalName) {
+ private StructTypeInfo rowTypeInfo;
+ private HCatSchema schema;
+
+ private JsonFactory jsonFactory = null;
+
+ private HCatRecordObjectInspector cachedObjectInspector;
+
+ @Override
+ public void initialize(Configuration conf, Properties tbl)
+ throws SerDeException {
+
+
+ LOG.debug("Initializing JsonSerDe");
+ LOG.debug("props to serde: {}", tbl.entrySet());
+
+
+ // Get column names and types
+ String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
+ String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
+
+ // all table column names
+ if (columnNameProperty.length() == 0) {
+ columnNames = new ArrayList<String>();
+ } else {
+ columnNames = Arrays.asList(columnNameProperty.split(","));
+ }
+
+ // all column types
+ if (columnTypeProperty.length() == 0) {
+ columnTypes = new ArrayList<TypeInfo>();
+ } else {
+ columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
+ }
+
+ LOG.debug("columns: {}, {}", columnNameProperty, columnNames);
+ LOG.debug("types: {}, {} ", columnTypeProperty, columnTypes);
+
+ assert (columnNames.size() == columnTypes.size());
+
+ rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
+
+ cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);
+ try {
+ schema = HCatSchemaUtils.getHCatSchema(rowTypeInfo).get(0).getStructSubSchema();
+ LOG.debug("schema : {}", schema);
+ LOG.debug("fields : {}", schema.getFieldNames());
+ } catch (HCatException e) {
+ throw new SerDeException(e);
+ }
+
+ jsonFactory = new JsonFactory();
+ }
+
+ /**
+ * Takes JSON string in Text form, and has to return an object representation above
+ * it that's readable by the corresponding object inspector.
+ *
+ * For this implementation, since we're using the jackson parser, we can construct
+ * our own object implementation, and we use HCatRecord for it
+ */
+ @Override
+ public Object deserialize(Writable blob) throws SerDeException {
+
+ Text t = (Text) blob;
+ JsonParser p;
+ List<Object> r = new ArrayList<Object>(Collections.nCopies(columnNames.size(), null));
+ try {
+ p = jsonFactory.createJsonParser(new ByteArrayInputStream((t.getBytes())));
+ if (p.nextToken() != JsonToken.START_OBJECT) {
+ throw new IOException("Start token not found where expected");
+ }
+ JsonToken token;
+ while (((token = p.nextToken()) != JsonToken.END_OBJECT) && (token != null)) {
+ // iterate through each token, and create appropriate object here.
+ populateRecord(r, token, p, schema);
+ }
+ } catch (JsonParseException e) {
+ LOG.warn("Error [{}] parsing json text [{}].", e, t);
+ LOG.debug(null, e);
+ throw new SerDeException(e);
+ } catch (IOException e) {
+ LOG.warn("Error [{}] parsing json text [{}].", e, t);
+ LOG.debug(null, e);
+ throw new SerDeException(e);
+ }
+
+ return new DefaultHCatRecord(r);
+ }
+
+ private void populateRecord(List<Object> r, JsonToken token, JsonParser p, HCatSchema s) throws IOException {
+ if (token != JsonToken.FIELD_NAME) {
+ throw new IOException("Field name expected");
+ }
+ String fieldName = p.getText();
+ int fpos;
+ try {
+ fpos = s.getPosition(fieldName);
+ } catch (NullPointerException npe) {
+ fpos = getPositionFromHiveInternalColumnName(fieldName);
+ LOG.debug("NPE finding position for field [{}] in schema [{}]", fieldName, s);
+ if (!fieldName.equalsIgnoreCase(getHiveInternalColumnName(fpos))) {
+ LOG.error("Hive internal column name {} and position "
+ + "encoding {} for the column name are at odds", fieldName, fpos);
+ throw npe;
+ }
+ if (fpos == -1) {
+ return; // unknown field, we return.
+ }
+ }
+ HCatFieldSchema hcatFieldSchema = s.getFields().get(fpos);
+ Object currField = extractCurrentField(p, null, hcatFieldSchema, false);
+ r.set(fpos, currField);
+ }
+
+ public String getHiveInternalColumnName(int fpos) {
+ return HiveConf.getColumnInternalName(fpos);
+ }
+
+ public int getPositionFromHiveInternalColumnName(String internalName) {
// return HiveConf.getPositionFromInternalName(fieldName);
- // The above line should have been all the implementation that
- // we need, but due to a bug in that impl which recognizes
- // only single-digit columns, we need another impl here.
- Pattern internalPattern = Pattern.compile("_col([0-9]+)");
- Matcher m = internalPattern.matcher(internalName);
- if (!m.matches()){
- return -1;
- } else {
- return Integer.parseInt(m.group(1));
- }
- }
-
- /**
- * Utility method to extract current expected field from given JsonParser
- *
- * To get the field, we need either a type or a hcatFieldSchema(necessary for complex types)
- * It is possible that one of them can be null, and so, if so, the other is instantiated
- * from the other
- *
- * isTokenCurrent is a boolean variable also passed in, which determines
- * if the JsonParser is already at the token we expect to read next, or
- * needs advancing to the next before we read.
- */
- private Object extractCurrentField(JsonParser p, Type t,
- HCatFieldSchema hcatFieldSchema, boolean isTokenCurrent) throws IOException, JsonParseException,
- HCatException {
- Object val = null;
- JsonToken valueToken;
- if (isTokenCurrent){
- valueToken = p.getCurrentToken();
- } else {
- valueToken = p.nextToken();
- }
-
- if (hcatFieldSchema != null){
- t = hcatFieldSchema.getType();
- }
- switch(t) {
- case INT:
- val = (valueToken == JsonToken.VALUE_NULL)?null:p.getIntValue();
- break;
- case TINYINT:
- val = (valueToken == JsonToken.VALUE_NULL)?null:p.getByteValue();
- break;
- case SMALLINT:
- val = (valueToken == JsonToken.VALUE_NULL)?null:p.getShortValue();
- break;
- case BIGINT:
- val = (valueToken == JsonToken.VALUE_NULL)?null:p.getLongValue();
- break;
- case BOOLEAN:
- String bval = (valueToken == JsonToken.VALUE_NULL)?null:p.getText();
- if (bval != null){
- val = Boolean.valueOf(bval);
- } else {
- val = null;
- }
- break;
- case FLOAT:
- val = (valueToken == JsonToken.VALUE_NULL)?null:p.getFloatValue();
- break;
- case DOUBLE:
- val = (valueToken == JsonToken.VALUE_NULL)?null:p.getDoubleValue();
- break;
- case STRING:
- val = (valueToken == JsonToken.VALUE_NULL)?null:p.getText();
- break;
- case BINARY:
- throw new IOException("JsonSerDe does not support BINARY type");
- case ARRAY:
- if (valueToken == JsonToken.VALUE_NULL){
- val = null;
- break;
- }
- if (valueToken != JsonToken.START_ARRAY){
- throw new IOException("Start of Array expected");
- }
- List<Object> arr = new ArrayList<Object>();
- while ((valueToken = p.nextToken()) != JsonToken.END_ARRAY) {
- arr.add(extractCurrentField(p, null,hcatFieldSchema.getArrayElementSchema().get(0),true));
- }
- val = arr;
- break;
- case MAP:
- if (valueToken == JsonToken.VALUE_NULL){
- val = null;
- break;
- }
- if (valueToken != JsonToken.START_OBJECT){
- throw new IOException("Start of Object expected");
- }
- Map<Object,Object> map = new LinkedHashMap<Object,Object>();
- Type keyType = hcatFieldSchema.getMapKeyType();
- HCatFieldSchema valueSchema = hcatFieldSchema.getMapValueSchema().get(0);
- while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) {
- Object k = getObjectOfCorrespondingPrimitiveType(p.getCurrentName(),keyType);
- Object v;
- if (valueSchema.getType() == HCatFieldSchema.Type.STRUCT){
- v = extractCurrentField(p,null, valueSchema,false);
+ // The above line should have been all the implementation that
+ // we need, but due to a bug in that impl which recognizes
+ // only single-digit columns, we need another impl here.
+ Pattern internalPattern = Pattern.compile("_col([0-9]+)");
+ Matcher m = internalPattern.matcher(internalName);
+ if (!m.matches()) {
+ return -1;
+ } else {
+ return Integer.parseInt(m.group(1));
+ }
+ }
+
+ /**
+ * Utility method to extract current expected field from given JsonParser
+ *
+ * To get the field, we need either a type or a hcatFieldSchema(necessary for complex types)
+ * It is possible that one of them can be null, and so, if so, the other is instantiated
+ * from the other
+ *
+ * isTokenCurrent is a boolean variable also passed in, which determines
+ * if the JsonParser is already at the token we expect to read next, or
+ * needs advancing to the next before we read.
+ */
+ private Object extractCurrentField(JsonParser p, Type t,
+ HCatFieldSchema hcatFieldSchema, boolean isTokenCurrent) throws IOException, JsonParseException,
+ HCatException {
+ Object val = null;
+ JsonToken valueToken;
+ if (isTokenCurrent) {
+ valueToken = p.getCurrentToken();
} else {
- v = extractCurrentField(p,null, valueSchema,true);
+ valueToken = p.nextToken();
}
- map.put(k, v);
- }
- val = map;
- break;
- case STRUCT:
- if (valueToken == JsonToken.VALUE_NULL){
- val = null;
- break;
- }
- if (valueToken != JsonToken.START_OBJECT){
- throw new IOException("Start of Object expected");
- }
- HCatSchema subSchema = hcatFieldSchema.getStructSubSchema();
- int sz = subSchema.getFieldNames().size();
-
- List<Object> struct = new ArrayList<Object>(Collections.nCopies(sz, null));
- while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) {
- populateRecord(struct, valueToken, p, subSchema);
- }
- val = struct;
- break;
- }
- return val;
- }
-
- private Object getObjectOfCorrespondingPrimitiveType(String s, Type t) throws IOException {
- switch(t) {
- case INT:
- return Integer.valueOf(s);
- case TINYINT:
- return Byte.valueOf(s);
- case SMALLINT:
- return Short.valueOf(s);
- case BIGINT:
- return Long.valueOf(s);
- case BOOLEAN:
- return (s.equalsIgnoreCase("true"));
- case FLOAT:
- return Float.valueOf(s);
- case DOUBLE:
- return Double.valueOf(s);
- case STRING:
- return s;
- case BINARY:
- throw new IOException("JsonSerDe does not support BINARY type");
- }
- throw new IOException("Could not convert from string to map type "+t);
- }
-
- /**
- * Given an object and object inspector pair, traverse the object
- * and generate a Text representation of the object.
- */
- @Override
- public Writable serialize(Object obj, ObjectInspector objInspector)
- throws SerDeException {
- StringBuilder sb = new StringBuilder();
- try {
-
- StructObjectInspector soi = (StructObjectInspector) objInspector;
- List<? extends StructField> structFields = soi.getAllStructFieldRefs();
- assert (columnNames.size() == structFields.size());
- if (obj == null) {
- sb.append("null");
- } else {
- sb.append(SerDeUtils.LBRACE);
- for (int i = 0; i < structFields.size(); i++) {
- if (i > 0) {
- sb.append(SerDeUtils.COMMA);
- }
- sb.append(SerDeUtils.QUOTE);
- sb.append(columnNames.get(i));
- sb.append(SerDeUtils.QUOTE);
- sb.append(SerDeUtils.COLON);
- buildJSONString(sb, soi.getStructFieldData(obj, structFields.get(i)),
- structFields.get(i).getFieldObjectInspector());
- }
- sb.append(SerDeUtils.RBRACE);
- }
-
- } catch (IOException e) {
- LOG.warn("Error generating json text from object.", e);
- throw new SerDeException(e);
- }
- return new Text(sb.toString());
- }
-
- // TODO : code section copied over from SerDeUtils because of non-standard json production there
- // should use quotes for all field names. We should fix this there, and then remove this copy.
- // See http://jackson.codehaus.org/1.7.3/javadoc/org/codehaus/jackson/JsonParser.Feature.html#ALLOW_UNQUOTED_FIELD_NAMES
- // for details - trying to enable Jackson to ignore that doesn't seem to work(compilation failure
- // when attempting to use that feature, so having to change the production itself.
- // Also, throws IOException when Binary is detected.
- private static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi) throws IOException {
-
- switch (oi.getCategory()) {
- case PRIMITIVE: {
- PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
- if (o == null) {
- sb.append("null");
- } else {
- switch (poi.getPrimitiveCategory()) {
- case BOOLEAN: {
- boolean b = ((BooleanObjectInspector) poi).get(o);
- sb.append(b ? "true" : "false");
- break;
- }
- case BYTE: {
- sb.append(((ByteObjectInspector) poi).get(o));
- break;
- }
- case SHORT: {
- sb.append(((ShortObjectInspector) poi).get(o));
- break;
- }
- case INT: {
- sb.append(((IntObjectInspector) poi).get(o));
- break;
- }
- case LONG: {
- sb.append(((LongObjectInspector) poi).get(o));
- break;
- }
- case FLOAT: {
- sb.append(((FloatObjectInspector) poi).get(o));
- break;
- }
- case DOUBLE: {
- sb.append(((DoubleObjectInspector) poi).get(o));
- break;
- }
- case STRING: {
- sb.append('"');
- sb.append(SerDeUtils.escapeString(((StringObjectInspector) poi)
- .getPrimitiveJavaObject(o)));
- sb.append('"');
- break;
- }
- case TIMESTAMP: {
- sb.append('"');
- sb.append(((TimestampObjectInspector) poi)
- .getPrimitiveWritableObject(o));
- sb.append('"');
- break;
+ if (hcatFieldSchema != null) {
+ t = hcatFieldSchema.getType();
}
- case BINARY: {
- throw new IOException("JsonSerDe does not support BINARY type");
+ switch (t) {
+ case INT:
+ val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getIntValue();
+ break;
+ case TINYINT:
+ val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getByteValue();
+ break;
+ case SMALLINT:
+ val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getShortValue();
+ break;
+ case BIGINT:
+ val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getLongValue();
+ break;
+ case BOOLEAN:
+ String bval = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText();
+ if (bval != null) {
+ val = Boolean.valueOf(bval);
+ } else {
+ val = null;
+ }
+ break;
+ case FLOAT:
+ val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getFloatValue();
+ break;
+ case DOUBLE:
+ val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getDoubleValue();
+ break;
+ case STRING:
+ val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText();
+ break;
+ case BINARY:
+ throw new IOException("JsonSerDe does not support BINARY type");
+ case ARRAY:
+ if (valueToken == JsonToken.VALUE_NULL) {
+ val = null;
+ break;
+ }
+ if (valueToken != JsonToken.START_ARRAY) {
+ throw new IOException("Start of Array expected");
+ }
+ List<Object> arr = new ArrayList<Object>();
+ while ((valueToken = p.nextToken()) != JsonToken.END_ARRAY) {
+ arr.add(extractCurrentField(p, null, hcatFieldSchema.getArrayElementSchema().get(0), true));
+ }
+ val = arr;
+ break;
+ case MAP:
+ if (valueToken == JsonToken.VALUE_NULL) {
+ val = null;
+ break;
+ }
+ if (valueToken != JsonToken.START_OBJECT) {
+ throw new IOException("Start of Object expected");
+ }
+ Map<Object, Object> map = new LinkedHashMap<Object, Object>();
+ Type keyType = hcatFieldSchema.getMapKeyType();
+ HCatFieldSchema valueSchema = hcatFieldSchema.getMapValueSchema().get(0);
+ while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) {
+ Object k = getObjectOfCorrespondingPrimitiveType(p.getCurrentName(), keyType);
+ Object v;
+ if (valueSchema.getType() == HCatFieldSchema.Type.STRUCT) {
+ v = extractCurrentField(p, null, valueSchema, false);
+ } else {
+ v = extractCurrentField(p, null, valueSchema, true);
+ }
+
+ map.put(k, v);
+ }
+ val = map;
+ break;
+ case STRUCT:
+ if (valueToken == JsonToken.VALUE_NULL) {
+ val = null;
+ break;
+ }
+ if (valueToken != JsonToken.START_OBJECT) {
+ throw new IOException("Start of Object expected");
+ }
+ HCatSchema subSchema = hcatFieldSchema.getStructSubSchema();
+ int sz = subSchema.getFieldNames().size();
+
+ List<Object> struct = new ArrayList<Object>(Collections.nCopies(sz, null));
+ while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) {
+ populateRecord(struct, valueToken, p, subSchema);
+ }
+ val = struct;
+ break;
+ }
+ return val;
+ }
+
+ private Object getObjectOfCorrespondingPrimitiveType(String s, Type t) throws IOException {
+ switch (t) {
+ case INT:
+ return Integer.valueOf(s);
+ case TINYINT:
+ return Byte.valueOf(s);
+ case SMALLINT:
+ return Short.valueOf(s);
+ case BIGINT:
+ return Long.valueOf(s);
+ case BOOLEAN:
+ return (s.equalsIgnoreCase("true"));
+ case FLOAT:
+ return Float.valueOf(s);
+ case DOUBLE:
+ return Double.valueOf(s);
+ case STRING:
+ return s;
+ case BINARY:
+ throw new IOException("JsonSerDe does not support BINARY type");
+ }
+ throw new IOException("Could not convert from string to map type " + t);
+ }
+
+ /**
+ * Given an object and object inspector pair, traverse the object
+ * and generate a Text representation of the object.
+ */
+ @Override
+ public Writable serialize(Object obj, ObjectInspector objInspector)
+ throws SerDeException {
+ StringBuilder sb = new StringBuilder();
+ try {
+
+ StructObjectInspector soi = (StructObjectInspector) objInspector;
+ List<? extends StructField> structFields = soi.getAllStructFieldRefs();
+ assert (columnNames.size() == structFields.size());
+ if (obj == null) {
+ sb.append("null");
+ } else {
+ sb.append(SerDeUtils.LBRACE);
+ for (int i = 0; i < structFields.size(); i++) {
+ if (i > 0) {
+ sb.append(SerDeUtils.COMMA);
+ }
+ sb.append(SerDeUtils.QUOTE);
+ sb.append(columnNames.get(i));
+ sb.append(SerDeUtils.QUOTE);
+ sb.append(SerDeUtils.COLON);
+ buildJSONString(sb, soi.getStructFieldData(obj, structFields.get(i)),
+ structFields.get(i).getFieldObjectInspector());
+ }
+ sb.append(SerDeUtils.RBRACE);
+ }
+
+ } catch (IOException e) {
+ LOG.warn("Error generating json text from object.", e);
+ throw new SerDeException(e);
+ }
+ return new Text(sb.toString());
+ }
+
+ // TODO : code section copied over from SerDeUtils because of non-standard json production there
+ // should use quotes for all field names. We should fix this there, and then remove this copy.
+ // See http://jackson.codehaus.org/1.7.3/javadoc/org/codehaus/jackson/JsonParser.Feature.html#ALLOW_UNQUOTED_FIELD_NAMES
+ // for details - trying to enable Jackson to ignore that doesn't seem to work(compilation failure
+ // when attempting to use that feature, so having to change the production itself.
+ // Also, throws IOException when Binary is detected.
+ private static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi) throws IOException {
+
+ switch (oi.getCategory()) {
+ case PRIMITIVE: {
+ PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
+ if (o == null) {
+ sb.append("null");
+ } else {
+ switch (poi.getPrimitiveCategory()) {
+ case BOOLEAN: {
+ boolean b = ((BooleanObjectInspector) poi).get(o);
+ sb.append(b ? "true" : "false");
+ break;
+ }
+ case BYTE: {
+ sb.append(((ByteObjectInspector) poi).get(o));
+ break;
+ }
+ case SHORT: {
+ sb.append(((ShortObjectInspector) poi).get(o));
+ break;
+ }
+ case INT: {
+ sb.append(((IntObjectInspector) poi).get(o));
+ break;
+ }
+ case LONG: {
+ sb.append(((LongObjectInspector) poi).get(o));
+ break;
+ }
+ case FLOAT: {
+ sb.append(((FloatObjectInspector) poi).get(o));
+ break;
+ }
+ case DOUBLE: {
+ sb.append(((DoubleObjectInspector) poi).get(o));
+ break;
+ }
+ case STRING: {
+ sb.append('"');
+ sb.append(SerDeUtils.escapeString(((StringObjectInspector) poi)
+ .getPrimitiveJavaObject(o)));
+ sb.append('"');
+ break;
+ }
+ case TIMESTAMP: {
+ sb.append('"');
+ sb.append(((TimestampObjectInspector) poi)
+ .getPrimitiveWritableObject(o));
+ sb.append('"');
+ break;
+ }
+ case BINARY: {
+ throw new IOException("JsonSerDe does not support BINARY type");
+ }
+ default:
+ throw new RuntimeException("Unknown primitive type: "
+ + poi.getPrimitiveCategory());
+ }
+ }
+ break;
+ }
+ case LIST: {
+ ListObjectInspector loi = (ListObjectInspector) oi;
+ ObjectInspector listElementObjectInspector = loi
+ .getListElementObjectInspector();
+ List<?> olist = loi.getList(o);
+ if (olist == null) {
+ sb.append("null");
+ } else {
+ sb.append(SerDeUtils.LBRACKET);
+ for (int i = 0; i < olist.size(); i++) {
+ if (i > 0) {
+ sb.append(SerDeUtils.COMMA);
+ }
+ buildJSONString(sb, olist.get(i), listElementObjectInspector);
+ }
+ sb.append(SerDeUtils.RBRACKET);
+ }
+ break;
+ }
+ case MAP: {
+ MapObjectInspector moi = (MapObjectInspector) oi;
+ ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector();
+ ObjectInspector mapValueObjectInspector = moi
+ .getMapValueObjectInspector();
+ Map<?, ?> omap = moi.getMap(o);
+ if (omap == null) {
+ sb.append("null");
+ } else {
+ sb.append(SerDeUtils.LBRACE);
+ boolean first = true;
+ for (Object entry : omap.entrySet()) {
+ if (first) {
+ first = false;
+ } else {
+ sb.append(SerDeUtils.COMMA);
+ }
+ Map.Entry<?, ?> e = (Map.Entry<?, ?>) entry;
+ StringBuilder keyBuilder = new StringBuilder();
+ buildJSONString(keyBuilder, e.getKey(), mapKeyObjectInspector);
+ String keyString = keyBuilder.toString().trim();
+ boolean doQuoting = (!keyString.isEmpty()) && (keyString.charAt(0) != SerDeUtils.QUOTE);
+ if (doQuoting) {
+ sb.append(SerDeUtils.QUOTE);
+ }
+ sb.append(keyString);
+ if (doQuoting) {
+ sb.append(SerDeUtils.QUOTE);
+ }
+ sb.append(SerDeUtils.COLON);
+ buildJSONString(sb, e.getValue(), mapValueObjectInspector);
+ }
+ sb.append(SerDeUtils.RBRACE);
+ }
+ break;
+ }
+ case STRUCT: {
+ StructObjectInspector soi = (StructObjectInspector) oi;
+ List<? extends StructField> structFields = soi.getAllStructFieldRefs();
+ if (o == null) {
+ sb.append("null");
+ } else {
+ sb.append(SerDeUtils.LBRACE);
+ for (int i = 0; i < structFields.size(); i++) {
+ if (i > 0) {
+ sb.append(SerDeUtils.COMMA);
+ }
+ sb.append(SerDeUtils.QUOTE);
+ sb.append(structFields.get(i).getFieldName());
+ sb.append(SerDeUtils.QUOTE);
+ sb.append(SerDeUtils.COLON);
+ buildJSONString(sb, soi.getStructFieldData(o, structFields.get(i)),
+ structFields.get(i).getFieldObjectInspector());
+ }
+ sb.append(SerDeUtils.RBRACE);
+ }
+ break;
+ }
+ case UNION: {
+ UnionObjectInspector uoi = (UnionObjectInspector) oi;
+ if (o == null) {
+ sb.append("null");
+ } else {
+ sb.append(SerDeUtils.LBRACE);
+ sb.append(uoi.getTag(o));
+ sb.append(SerDeUtils.COLON);
+ buildJSONString(sb, uoi.getField(o),
+ uoi.getObjectInspectors().get(uoi.getTag(o)));
+ sb.append(SerDeUtils.RBRACE);
+ }
+ break;
}
default:
- throw new RuntimeException("Unknown primitive type: "
- + poi.getPrimitiveCategory());
+ throw new RuntimeException("Unknown type in ObjectInspector!");
}
- }
- break;
}
- case LIST: {
- ListObjectInspector loi = (ListObjectInspector) oi;
- ObjectInspector listElementObjectInspector = loi
- .getListElementObjectInspector();
- List<?> olist = loi.getList(o);
- if (olist == null) {
- sb.append("null");
- } else {
- sb.append(SerDeUtils.LBRACKET);
- for (int i = 0; i < olist.size(); i++) {
- if (i > 0) {
- sb.append(SerDeUtils.COMMA);
- }
- buildJSONString(sb, olist.get(i), listElementObjectInspector);
- }
- sb.append(SerDeUtils.RBRACKET);
- }
- break;
- }
- case MAP: {
- MapObjectInspector moi = (MapObjectInspector) oi;
- ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector();
- ObjectInspector mapValueObjectInspector = moi
- .getMapValueObjectInspector();
- Map<?, ?> omap = moi.getMap(o);
- if (omap == null) {
- sb.append("null");
- } else {
- sb.append(SerDeUtils.LBRACE);
- boolean first = true;
- for (Object entry : omap.entrySet()) {
- if (first) {
- first = false;
- } else {
- sb.append(SerDeUtils.COMMA);
- }
- Map.Entry<?, ?> e = (Map.Entry<?, ?>) entry;
- StringBuilder keyBuilder = new StringBuilder();
- buildJSONString(keyBuilder, e.getKey(), mapKeyObjectInspector);
- String keyString = keyBuilder.toString().trim();
- boolean doQuoting = (!keyString.isEmpty()) && (keyString.charAt(0)!= SerDeUtils.QUOTE);
- if (doQuoting ){
- sb.append(SerDeUtils.QUOTE);
- }
- sb.append(keyString);
- if (doQuoting ){
- sb.append(SerDeUtils.QUOTE);
- }
- sb.append(SerDeUtils.COLON);
- buildJSONString(sb, e.getValue(), mapValueObjectInspector);
- }
- sb.append(SerDeUtils.RBRACE);
- }
- break;
- }
- case STRUCT: {
- StructObjectInspector soi = (StructObjectInspector) oi;
- List<? extends StructField> structFields = soi.getAllStructFieldRefs();
- if (o == null) {
- sb.append("null");
- } else {
- sb.append(SerDeUtils.LBRACE);
- for (int i = 0; i < structFields.size(); i++) {
- if (i > 0) {
- sb.append(SerDeUtils.COMMA);
- }
- sb.append(SerDeUtils.QUOTE);
- sb.append(structFields.get(i).getFieldName());
- sb.append(SerDeUtils.QUOTE);
- sb.append(SerDeUtils.COLON);
- buildJSONString(sb, soi.getStructFieldData(o, structFields.get(i)),
- structFields.get(i).getFieldObjectInspector());
- }
- sb.append(SerDeUtils.RBRACE);
- }
- break;
- }
- case UNION: {
- UnionObjectInspector uoi = (UnionObjectInspector) oi;
- if (o == null) {
- sb.append("null");
- } else {
- sb.append(SerDeUtils.LBRACE);
- sb.append(uoi.getTag(o));
- sb.append(SerDeUtils.COLON);
- buildJSONString(sb, uoi.getField(o),
- uoi.getObjectInspectors().get(uoi.getTag(o)));
- sb.append(SerDeUtils.RBRACE);
- }
- break;
- }
- default:
- throw new RuntimeException("Unknown type in ObjectInspector!");
- }
- }
-
-
- /**
- * Returns an object inspector for the specified schema that
- * is capable of reading in the object representation of the JSON string
- */
- @Override
- public ObjectInspector getObjectInspector() throws SerDeException {
- return cachedObjectInspector;
- }
-
- @Override
- public Class<? extends Writable> getSerializedClass() {
- return Text.class;
- }
-
- @Override
- public SerDeStats getSerDeStats() {
- // no support for statistics yet
- return null;
- }
+
+
+ /**
+ * Returns an object inspector for the specified schema that
+ * is capable of reading in the object representation of the JSON string
+ */
+ @Override
+ public ObjectInspector getObjectInspector() throws SerDeException {
+ return cachedObjectInspector;
+ }
+
+ @Override
+ public Class<? extends Writable> getSerializedClass() {
+ return Text.class;
+ }
+
+ @Override
+ public SerDeStats getSerDeStats() {
+ // no support for statistics yet
+ return null;
+ }
}
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/LazyHCatRecord.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/LazyHCatRecord.java?rev=1383152&r1=1383151&r2=1383152&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/LazyHCatRecord.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/LazyHCatRecord.java Mon Sep 10 23:28:55 2012
@@ -40,110 +40,106 @@ import org.slf4j.LoggerFactory;
*/
public class LazyHCatRecord extends HCatRecord {
- public static final Logger LOG = LoggerFactory.getLogger(LazyHCatRecord.class.getName());
+ public static final Logger LOG = LoggerFactory.getLogger(LazyHCatRecord.class.getName());
- private Object wrappedObject;
- private StructObjectInspector soi;
-
- @Override
- public Object get(int fieldNum) {
- try {
- StructField fref = soi.getAllStructFieldRefs().get(fieldNum);
- return HCatRecordSerDe.serializeField(
- soi.getStructFieldData(wrappedObject, fref),
- fref.getFieldObjectInspector());
- } catch (SerDeException e) {
- throw new IllegalStateException("SerDe Exception deserializing",e);
- }
- }
-
-
- @Override
- public List<Object> getAll() {
-
- List<Object> r = new ArrayList<Object>(this.size());
- for (int i = 0; i < this.size(); i++){
- r.add(i, get(i));
- }
- return r;
- }
-
- @Override
- public void set(int fieldNum, Object value) {
- throw new UnsupportedOperationException("not allowed to run set() on LazyHCatRecord");
- }
-
- @Override
- public int size() {
- return soi.getAllStructFieldRefs().size();
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- throw new UnsupportedOperationException("LazyHCatRecord is intended to wrap"
- + " an object/object inspector as a HCatRecord "
- + "- it does not need to be read from DataInput.");
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- throw new UnsupportedOperationException("LazyHCatRecord is intended to wrap"
- + " an object/object inspector as a HCatRecord "
- + "- it does not need to be written to a DataOutput.");
- }
-
- @Override
- public Object get(String fieldName, HCatSchema recordSchema)
- throws HCatException {
- int idx = recordSchema.getPosition(fieldName);
- return get(idx);
- }
-
- @Override
- public void set(String fieldName, HCatSchema recordSchema, Object value)
- throws HCatException {
- throw new UnsupportedOperationException("not allowed to run set() on LazyHCatRecord");
- }
-
- @Override
- public void remove(int idx) throws HCatException {
- throw new UnsupportedOperationException("not allowed to run remove() on LazyHCatRecord");
- }
-
- @Override
- public void copy(HCatRecord r) throws HCatException {
- throw new UnsupportedOperationException("not allowed to run copy() on LazyHCatRecord");
- }
-
- public LazyHCatRecord(Object wrappedObject, ObjectInspector oi) throws Exception {
- if (oi.getCategory() != Category.STRUCT) {
- throw new SerDeException(getClass().toString() + " can only make a lazy hcat record from " +
- "objects of struct types, but we got: " + oi.getTypeName());
- }
-
- this.soi = (StructObjectInspector)oi;
- this.wrappedObject = wrappedObject;
- }
-
- @Override
- public String toString(){
- StringBuilder sb = new StringBuilder();
- for(int i = 0; i< size() ; i++) {
- sb.append(get(i)+"\t");
- }
- return sb.toString();
- }
-
- /**
- * Convert this LazyHCatRecord to a DefaultHCatRecord. This is required
- * before you can write out a record via write.
- * @return an HCatRecord that can be serialized
- * @throws HCatException
- */
- public HCatRecord getWritable() throws HCatException {
- DefaultHCatRecord d = new DefaultHCatRecord();
- d.copy(this);
- return d;
- }
+ private Object wrappedObject;
+ private StructObjectInspector soi;
+ @Override
+ public Object get(int fieldNum) {
+ try {
+ StructField fref = soi.getAllStructFieldRefs().get(fieldNum);
+ return HCatRecordSerDe.serializeField(
+ soi.getStructFieldData(wrappedObject, fref),
+ fref.getFieldObjectInspector());
+ } catch (SerDeException e) {
+ throw new IllegalStateException("SerDe Exception deserializing",e);
+ }
+ }
+
+ @Override
+ public List<Object> getAll() {
+ List<Object> r = new ArrayList<Object>(this.size());
+ for (int i = 0; i < this.size(); i++){
+ r.add(i, get(i));
+ }
+ return r;
+ }
+
+ @Override
+ public void set(int fieldNum, Object value) {
+ throw new UnsupportedOperationException("not allowed to run set() on LazyHCatRecord");
+ }
+
+ @Override
+ public int size() {
+ return soi.getAllStructFieldRefs().size();
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ throw new UnsupportedOperationException("LazyHCatRecord is intended to wrap"
+ + " an object/object inspector as a HCatRecord "
+ + "- it does not need to be read from DataInput.");
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ throw new UnsupportedOperationException("LazyHCatRecord is intended to wrap"
+ + " an object/object inspector as a HCatRecord "
+ + "- it does not need to be written to a DataOutput.");
+ }
+
+ @Override
+ public Object get(String fieldName, HCatSchema recordSchema) throws HCatException {
+ int idx = recordSchema.getPosition(fieldName);
+ return get(idx);
+ }
+
+ @Override
+ public void set(String fieldName, HCatSchema recordSchema, Object value) throws HCatException {
+ throw new UnsupportedOperationException("not allowed to run set() on LazyHCatRecord");
+ }
+
+ @Override
+ public void remove(int idx) throws HCatException {
+ throw new UnsupportedOperationException("not allowed to run remove() on LazyHCatRecord");
+ }
+
+ @Override
+ public void copy(HCatRecord r) throws HCatException {
+ throw new UnsupportedOperationException("not allowed to run copy() on LazyHCatRecord");
+ }
+
+ public LazyHCatRecord(Object wrappedObject, ObjectInspector oi) throws Exception {
+ if (oi.getCategory() != Category.STRUCT) {
+ throw new SerDeException(getClass().toString() +
+ " can only make a lazy hcat record from " +
+ "objects of struct types, but we got: " + oi.getTypeName());
+ }
+
+ this.soi = (StructObjectInspector)oi;
+ this.wrappedObject = wrappedObject;
+ }
+
+ @Override
+ public String toString(){
+ StringBuilder sb = new StringBuilder();
+ for(int i = 0; i< size() ; i++) {
+ sb.append(get(i)+"\t");
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Convert this LazyHCatRecord to a DefaultHCatRecord. This is required
+ * before you can write out a record via write.
+ * @return an HCatRecord that can be serialized
+ * @throws HCatException
+ */
+ public HCatRecord getWritable() throws HCatException {
+ DefaultHCatRecord d = new DefaultHCatRecord();
+ d.copy(this);
+ return d;
+ }
}
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/Pair.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/Pair.java?rev=1383152&r1=1383151&r2=1383152&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/Pair.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/Pair.java Mon Sep 10 23:28:55 2012
@@ -42,44 +42,44 @@ public class Pair<T, U> implements Seria
*/
@Override
public String toString() {
- return "[" + first.toString() +"," + second.toString() + "]";
+ return "[" + first.toString() + "," + second.toString() + "]";
}
@Override
public int hashCode() {
return (((this.first == null ? 1 : this.first.hashCode()) * 17)
- + (this.second == null ? 1 : this.second.hashCode()) * 19);
+ + (this.second == null ? 1 : this.second.hashCode()) * 19);
}
@Override
public boolean equals(Object other) {
- if(other == null) {
+ if (other == null) {
return false;
}
- if(! (other instanceof Pair)) {
+ if (!(other instanceof Pair)) {
return false;
}
Pair otherPair = (Pair) other;
- if(this.first == null) {
- if(otherPair.first != null) {
+ if (this.first == null) {
+ if (otherPair.first != null) {
return false;
} else {
return true;
}
}
- if(this.second == null) {
- if(otherPair.second != null) {
+ if (this.second == null) {
+ if (otherPair.second != null) {
return false;
} else {
return true;
}
}
- if(this.first.equals(otherPair.first) && this.second.equals(otherPair.second)) {
+ if (this.first.equals(otherPair.first) && this.second.equals(otherPair.second)) {
return true;
} else {
return false;
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/ReaderWriter.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/ReaderWriter.java?rev=1383152&r1=1383151&r2=1383152&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/ReaderWriter.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/ReaderWriter.java Mon Sep 10 23:28:55 2012
@@ -34,158 +34,158 @@ import org.apache.hadoop.io.VLongWritabl
public abstract class ReaderWriter {
- private static final String UTF8 = "UTF-8";
+ private static final String UTF8 = "UTF-8";
- public static Object readDatum(DataInput in) throws IOException {
+ public static Object readDatum(DataInput in) throws IOException {
- byte type = in.readByte();
- switch (type) {
+ byte type = in.readByte();
+ switch (type) {
- case DataType.STRING:
- byte[] buffer = new byte[in.readInt()];
- in.readFully(buffer);
- return new String(buffer,UTF8);
-
- case DataType.INTEGER:
- VIntWritable vint = new VIntWritable();
- vint.readFields(in);
- return vint.get();
-
- case DataType.LONG:
- VLongWritable vlong = new VLongWritable();
- vlong.readFields(in);
- return vlong.get();
-
- case DataType.FLOAT:
- return in.readFloat();
-
- case DataType.DOUBLE:
- return in.readDouble();
-
- case DataType.BOOLEAN:
- return in.readBoolean();
-
- case DataType.BYTE:
- return in.readByte();
-
- case DataType.SHORT:
- return in.readShort();
-
- case DataType.NULL:
- return null;
-
- case DataType.BINARY:
- int len = in.readInt();
- byte[] ba = new byte[len];
- in.readFully(ba);
- return ba;
-
- case DataType.MAP:
- int size = in.readInt();
- Map<Object,Object> m = new HashMap<Object, Object>(size);
- for (int i = 0; i < size; i++) {
- m.put(readDatum(in), readDatum(in));
- }
- return m;
-
- case DataType.LIST:
- int sz = in.readInt();
- List<Object> list = new ArrayList<Object>(sz);
- for(int i=0; i < sz; i++) {
- list.add(readDatum(in));
- }
- return list;
-
- default:
- throw new IOException("Unexpected data type " + type +
- " found in stream.");
+ case DataType.STRING:
+ byte[] buffer = new byte[in.readInt()];
+ in.readFully(buffer);
+ return new String(buffer, UTF8);
+
+ case DataType.INTEGER:
+ VIntWritable vint = new VIntWritable();
+ vint.readFields(in);
+ return vint.get();
+
+ case DataType.LONG:
+ VLongWritable vlong = new VLongWritable();
+ vlong.readFields(in);
+ return vlong.get();
+
+ case DataType.FLOAT:
+ return in.readFloat();
+
+ case DataType.DOUBLE:
+ return in.readDouble();
+
+ case DataType.BOOLEAN:
+ return in.readBoolean();
+
+ case DataType.BYTE:
+ return in.readByte();
+
+ case DataType.SHORT:
+ return in.readShort();
+
+ case DataType.NULL:
+ return null;
+
+ case DataType.BINARY:
+ int len = in.readInt();
+ byte[] ba = new byte[len];
+ in.readFully(ba);
+ return ba;
+
+ case DataType.MAP:
+ int size = in.readInt();
+ Map<Object, Object> m = new HashMap<Object, Object>(size);
+ for (int i = 0; i < size; i++) {
+ m.put(readDatum(in), readDatum(in));
+ }
+ return m;
+
+ case DataType.LIST:
+ int sz = in.readInt();
+ List<Object> list = new ArrayList<Object>(sz);
+ for (int i = 0; i < sz; i++) {
+ list.add(readDatum(in));
+ }
+ return list;
+
+ default:
+ throw new IOException("Unexpected data type " + type +
+ " found in stream.");
+ }
}
- }
- public static void writeDatum(DataOutput out, Object val) throws IOException {
- // write the data type
- byte type = DataType.findType(val);
- switch (type) {
- case DataType.LIST:
- out.writeByte(DataType.LIST);
- List<?> list = (List<?>)val;
- int sz = list.size();
- out.writeInt(sz);
- for (int i = 0; i < sz; i++) {
- writeDatum(out, list.get(i));
- }
- return;
-
- case DataType.MAP:
- out.writeByte(DataType.MAP);
- Map<?,?> m = (Map<?, ?>)val;
- out.writeInt(m.size());
- Iterator<?> i =
- m.entrySet().iterator();
- while (i.hasNext()) {
- Entry<?,?> entry = (Entry<?, ?>) i.next();
- writeDatum(out, entry.getKey());
- writeDatum(out, entry.getValue());
- }
- return;
-
- case DataType.INTEGER:
- out.writeByte(DataType.INTEGER);
- new VIntWritable((Integer)val).write(out);
- return;
-
- case DataType.LONG:
- out.writeByte(DataType.LONG);
- new VLongWritable((Long)val).write(out);
- return;
-
- case DataType.FLOAT:
- out.writeByte(DataType.FLOAT);
- out.writeFloat((Float)val);
- return;
-
- case DataType.DOUBLE:
- out.writeByte(DataType.DOUBLE);
- out.writeDouble((Double)val);
- return;
-
- case DataType.BOOLEAN:
- out.writeByte(DataType.BOOLEAN);
- out.writeBoolean((Boolean)val);
- return;
-
- case DataType.BYTE:
- out.writeByte(DataType.BYTE);
- out.writeByte((Byte)val);
- return;
-
- case DataType.SHORT:
- out.writeByte(DataType.SHORT);
- out.writeShort((Short)val);
- return;
-
- case DataType.STRING:
- String s = (String)val;
- byte[] utfBytes = s.getBytes(ReaderWriter.UTF8);
- out.writeByte(DataType.STRING);
- out.writeInt(utfBytes.length);
- out.write(utfBytes);
- return;
-
- case DataType.BINARY:
- byte[] ba = (byte[])val;
- out.writeByte(DataType.BINARY);
- out.writeInt(ba.length);
- out.write(ba);
- return;
-
- case DataType.NULL:
- out.writeByte(DataType.NULL);
- return;
-
- default:
- throw new IOException("Unexpected data type " + type +
- " found in stream.");
+ public static void writeDatum(DataOutput out, Object val) throws IOException {
+ // write the data type
+ byte type = DataType.findType(val);
+ switch (type) {
+ case DataType.LIST:
+ out.writeByte(DataType.LIST);
+ List<?> list = (List<?>) val;
+ int sz = list.size();
+ out.writeInt(sz);
+ for (int i = 0; i < sz; i++) {
+ writeDatum(out, list.get(i));
+ }
+ return;
+
+ case DataType.MAP:
+ out.writeByte(DataType.MAP);
+ Map<?, ?> m = (Map<?, ?>) val;
+ out.writeInt(m.size());
+ Iterator<?> i =
+ m.entrySet().iterator();
+ while (i.hasNext()) {
+ Entry<?, ?> entry = (Entry<?, ?>) i.next();
+ writeDatum(out, entry.getKey());
+ writeDatum(out, entry.getValue());
+ }
+ return;
+
+ case DataType.INTEGER:
+ out.writeByte(DataType.INTEGER);
+ new VIntWritable((Integer) val).write(out);
+ return;
+
+ case DataType.LONG:
+ out.writeByte(DataType.LONG);
+ new VLongWritable((Long) val).write(out);
+ return;
+
+ case DataType.FLOAT:
+ out.writeByte(DataType.FLOAT);
+ out.writeFloat((Float) val);
+ return;
+
+ case DataType.DOUBLE:
+ out.writeByte(DataType.DOUBLE);
+ out.writeDouble((Double) val);
+ return;
+
+ case DataType.BOOLEAN:
+ out.writeByte(DataType.BOOLEAN);
+ out.writeBoolean((Boolean) val);
+ return;
+
+ case DataType.BYTE:
+ out.writeByte(DataType.BYTE);
+ out.writeByte((Byte) val);
+ return;
+
+ case DataType.SHORT:
+ out.writeByte(DataType.SHORT);
+ out.writeShort((Short) val);
+ return;
+
+ case DataType.STRING:
+ String s = (String) val;
+ byte[] utfBytes = s.getBytes(ReaderWriter.UTF8);
+ out.writeByte(DataType.STRING);
+ out.writeInt(utfBytes.length);
+ out.write(utfBytes);
+ return;
+
+ case DataType.BINARY:
+ byte[] ba = (byte[]) val;
+ out.writeByte(DataType.BINARY);
+ out.writeInt(ba.length);
+ out.write(ba);
+ return;
+
+ case DataType.NULL:
+ out.writeByte(DataType.NULL);
+ return;
+
+ default:
+ throw new IOException("Unexpected data type " + type +
+ " found in stream.");
+ }
}
- }
}